seq.asn
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:17k
- --$Revision: 1000.1 $
- --**********************************************************************
- --
- -- NCBI Sequence elements
- -- by James Ostell, 1990
- -- Version 3.0 - June 1994
- --
- --**********************************************************************
- NCBI-Sequence DEFINITIONS ::=
- BEGIN
- EXPORTS Annotdesc, Bioseq, GIBB-mol, Heterogen, Numbering, Pubdesc,
- Seq-annot, Seq-descr, Seq-hist, Seq-literal, Seqdesc;
- IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
- Seq-align FROM NCBI-Seqalign
- Seq-feat FROM NCBI-Seqfeat
- Seq-graph FROM NCBI-Seqres
- Pub-equiv FROM NCBI-Pub
- Org-ref FROM NCBI-Organism
- BioSource FROM NCBI-BioSource
- Seq-id, Seq-loc FROM NCBI-Seqloc
- GB-block FROM GenBank-General
- PIR-block FROM PIR-General
- EMBL-block FROM EMBL-General
- SP-block FROM SP-General
- PRF-block FROM PRF-General
- PDB-block FROM PDB-General;
- --*** Sequence ********************************
- --*
- Bioseq ::= SEQUENCE {
- id SET OF Seq-id , -- equivalent identifiers
- descr Seq-descr OPTIONAL , -- descriptors
- inst Seq-inst , -- the sequence data
- annot SET OF Seq-annot OPTIONAL }
- --*** Descriptors *****************************
- --*
- Seq-descr ::= SET OF Seqdesc
- Seqdesc ::= CHOICE {
- mol-type GIBB-mol , -- type of molecule
- modif SET OF GIBB-mod , -- modifiers
- method GIBB-method , -- sequencing method
- name VisibleString , -- a name for this sequence
- title VisibleString , -- a title for this sequence
- org Org-ref , -- if all from one organism
- comment VisibleString , -- a more extensive comment
- num Numbering , -- a numbering system
- maploc Dbtag , -- map location of this sequence
- pir PIR-block , -- PIR specific info
- genbank GB-block , -- GenBank specific info
- pub Pubdesc , -- a reference to the publication
- region VisibleString , -- overall region (globin locus)
- user User-object , -- user defined object
- sp SP-block , -- SWISSPROT specific info
- dbxref Dbtag , -- xref to other databases
- embl EMBL-block , -- EMBL specific information
- create-date Date , -- date entry first created/released
- update-date Date , -- date of last update
- prf PRF-block , -- PRF specific information
- pdb PDB-block , -- PDB specific information
- het Heterogen , -- cofactor, etc associated but not bound
- source BioSource , -- source of materials, includes Org-ref
- molinfo MolInfo } -- info on the molecule and techniques
- --******* NOTE:
- --* mol-type, modif, method, and org are consolidated and expanded
- --* in Org-ref, BioSource, and MolInfo in this specification. They
- --* will be removed in later specifications. Do not use them in the
- --* the future. Instead expect the new structures.
- --*
- --***************************
- --********************************************************************
- --
- -- MolInfo gives information on the
- -- classification of the type and quality of the sequence
- --
- -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
- --
- --********************************************************************
- MolInfo ::= SEQUENCE {
- biomol INTEGER {
- unknown (0) ,
- genomic (1) ,
- pre-RNA (2) , -- precursor RNA of any sort really
- mRNA (3) ,
- rRNA (4) ,
- tRNA (5) ,
- snRNA (6) ,
- scRNA (7) ,
- peptide (8) ,
- other-genetic (9) , -- other genetic material
- genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
- cRNA (11) , -- viral RNA genome copy intermediate
- snoRNA (12) , -- small nucleolar RNA
- transcribed-RNA (13) , -- transcribed RNA other than existing classes
- other (255) } DEFAULT unknown ,
- tech INTEGER {
- unknown (0) ,
- standard (1) , -- standard sequencing
- est (2) , -- Expressed Sequence Tag
- sts (3) , -- Sequence Tagged Site
- survey (4) , -- one-pass genomic sequence
- genemap (5) , -- from genetic mapping techniques
- physmap (6) , -- from physical mapping techniques
- derived (7) , -- derived from other data, not a primary entity
- concept-trans (8) , -- conceptual translation
- seq-pept (9) , -- peptide was sequenced
- both (10) , -- concept transl. w/ partial pept. seq.
- seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
- seq-pept-homol (12) , -- sequenced peptide, ordered by homology
- concept-trans-a (13) , -- conceptual transl. supplied by author
- htgs-1 (14) , -- unordered High Throughput sequence contig
- htgs-2 (15) , -- ordered High Throughput sequence contig
- htgs-3 (16) , -- finished High Throughput sequence
- fli-cdna (17) , -- full length insert cDNA
- htgs-0 (18) , -- single genomic reads for coordination
- htc (19) , -- high throughput cDNA
- wgs (20) , -- whole genome shotgun sequencing
- other (255) } -- use Source.techexp
- DEFAULT unknown ,
- techexp VisibleString OPTIONAL , -- explanation if tech not enough
- --
- -- Completeness is not indicated in most records. For genomes, assume
- -- the sequences are incomplete unless specifically marked as complete.
- -- For mRNAs, assume the ends are not known exactly unless marked as
- -- having the left or right end.
- --
- completeness INTEGER {
- unknown (0) ,
- complete (1) , -- complete biological entity
- partial (2) , -- partial but no details given
- no-left (3) , -- missing 5' or NH3 end
- no-right (4) , -- missing 3' or COOH end
- no-ends (5) , -- missing both ends
- has-left (6) , -- 5' or NH3 end present
- has-right (7) , -- 3' or COOH end present
- other (255) } DEFAULT unknown }
- GIBB-mol ::= ENUMERATED { -- type of molecule represented
- unknown (0) ,
- genomic (1) ,
- pre-mRNA (2) , -- precursor RNA of any sort really
- mRNA (3) ,
- rRNA (4) ,
- tRNA (5) ,
- snRNA (6) ,
- scRNA (7) ,
- peptide (8) ,
- other-genetic (9) , -- other genetic material
- genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
- other (255) }
-
- GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers
- dna (0) ,
- rna (1) ,
- extrachrom (2) ,
- plasmid (3) ,
- mitochondrial (4) ,
- chloroplast (5) ,
- kinetoplast (6) ,
- cyanelle (7) ,
- synthetic (8) ,
- recombinant (9) ,
- partial (10) ,
- complete (11) ,
- mutagen (12) , -- subject of mutagenesis ?
- natmut (13) , -- natural mutant ?
- transposon (14) ,
- insertion-seq (15) ,
- no-left (16) , -- missing left end (5' for na, NH2 for aa)
- no-right (17) , -- missing right end (3' or COOH)
- macronuclear (18) ,
- proviral (19) ,
- est (20) , -- expressed sequence tag
- sts (21) , -- sequence tagged site
- survey (22) , -- one pass survey sequence
- chromoplast (23) ,
- genemap (24) , -- is a genetic map
- restmap (25) , -- is an ordered restriction map
- physmap (26) , -- is a physical map (not ordered restriction map)
- other (255) }
- GIBB-method ::= ENUMERATED { -- sequencing methods
- concept-trans (1) , -- conceptual translation
- seq-pept (2) , -- peptide was sequenced
- both (3) , -- concept transl. w/ partial pept. seq.
- seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
- seq-pept-homol (5) , -- sequenced peptide, ordered by homology
- concept-trans-a (6) , -- conceptual transl. supplied by author
- other (255) }
-
- Numbering ::= CHOICE { -- any display numbering system
- cont Num-cont , -- continuous numbering
- enum Num-enum , -- enumerated names for residues
- ref Num-ref , -- by reference to another sequence
- real Num-real } -- supports mapping to a float system
-
- Num-cont ::= SEQUENCE { -- continuous display numbering system
- refnum INTEGER DEFAULT 1, -- number assigned to first residue
- has-zero BOOLEAN DEFAULT FALSE , -- 0 used?
- ascending BOOLEAN DEFAULT TRUE } -- ascending numbers?
- Num-enum ::= SEQUENCE { -- any tags to residues
- num INTEGER , -- number of tags to follow
- names SEQUENCE OF VisibleString } -- the tags
- Num-ref ::= SEQUENCE { -- by reference to other sequences
- type ENUMERATED { -- type of reference
- not-set (0) ,
- sources (1) , -- by segmented or const seq sources
- aligns (2) } , -- by alignments given below
- aligns Seq-align OPTIONAL }
- Num-real ::= SEQUENCE { -- mapping to floating point system
- a REAL , -- from an integer system used by Bioseq
- b REAL , -- position = (a * int_position) + b
- units VisibleString OPTIONAL }
- Pubdesc ::= SEQUENCE { -- how sequence presented in pub
- pub Pub-equiv , -- the citation(s)
- name VisibleString OPTIONAL , -- name used in paper
- fig VisibleString OPTIONAL , -- figure in paper
- num Numbering OPTIONAL , -- numbering from paper
- numexc BOOLEAN OPTIONAL , -- numbering problem with paper
- poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure?
- maploc VisibleString OPTIONAL , -- map location reported in paper
- seq-raw StringStore OPTIONAL , -- original sequence from paper
- align-group INTEGER OPTIONAL , -- this seq aligned with others in paper
- comment VisibleString OPTIONAL, -- any comment on this pub in context
- reftype INTEGER { -- type of reference in a GenBank record
- seq (0) , -- refers to sequence
- sites (1) , -- refers to unspecified features
- feats (2) , -- refers to specified features
- no-target (3) } -- nothing specified (EMBL)
- DEFAULT seq }
- Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc
- --*** Instances of sequences *******************************
- --*
- Seq-inst ::= SEQUENCE { -- the sequence data itself
- repr ENUMERATED { -- representation class
- not-set (0) , -- empty
- virtual (1) , -- no seq data
- raw (2) , -- continuous sequence
- seg (3) , -- segmented sequence
- const (4) , -- constructed sequence
- ref (5) , -- reference to another sequence
- consen (6) , -- consensus sequence or pattern
- map (7) , -- ordered map of any kind
- delta (8) , -- sequence made by changes (delta) to others
- other (255) } ,
- mol ENUMERATED { -- molecule class in living organism
- not-set (0) , -- > cdna = rna
- dna (1) ,
- rna (2) ,
- aa (3) ,
- na (4) , -- just a nucleic acid
- other (255) } ,
- length INTEGER OPTIONAL , -- length of sequence in residues
- fuzz Int-fuzz OPTIONAL , -- length uncertainty
- topology ENUMERATED { -- topology of molecule
- not-set (0) ,
- linear (1) ,
- circular (2) ,
- tandem (3) , -- some part of tandem repeat
- other (255) } DEFAULT linear ,
- strand ENUMERATED { -- strandedness in living organism
- not-set (0) ,
- ss (1) , -- single strand
- ds (2) , -- double strand
- mixed (3) ,
- other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
- seq-data Seq-data OPTIONAL , -- the sequence
- ext Seq-ext OPTIONAL , -- extensions for special types
- hist Seq-hist OPTIONAL } -- sequence history
- --*** Sequence Extensions **********************************
- --* for representing more complex types
- --* const type uses Seq-hist.assembly
- Seq-ext ::= CHOICE {
- seg Seg-ext , -- segmented sequences
- ref Ref-ext , -- hot link to another sequence (a view)
- map Map-ext , -- ordered map of markers
- delta Delta-ext }
- Seg-ext ::= SEQUENCE OF Seq-loc
- Ref-ext ::= Seq-loc
- Map-ext ::= SEQUENCE OF Seq-feat
- Delta-ext ::= SEQUENCE OF Delta-seq
- Delta-seq ::= CHOICE {
- loc Seq-loc , -- point to a sequence
- literal Seq-literal } -- a piece of sequence
- Seq-literal ::= SEQUENCE {
- length INTEGER , -- must give a length in residues
- fuzz Int-fuzz OPTIONAL , -- could be unsure
- seq-data Seq-data OPTIONAL } -- may have the data
- --*** Sequence History Record ***********************************
- --** assembly = records how seq was assembled from others
- --** replaces = records sequences made obsolete by this one
- --** replaced-by = this seq is made obsolete by another(s)
- Seq-hist ::= SEQUENCE {
- assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
- replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete
- replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
- deleted CHOICE {
- bool BOOLEAN ,
- date Date } OPTIONAL }
- Seq-hist-rec ::= SEQUENCE {
- date Date OPTIONAL ,
- ids SET OF Seq-id }
-
- --*** Various internal sequence representations ************
- --* all are controlled, fixed length forms
- Seq-data ::= CHOICE { -- sequence representations
- iupacna IUPACna , -- IUPAC 1 letter nuc acid code
- iupacaa IUPACaa , -- IUPAC 1 letter amino acid code
- ncbi2na NCBI2na , -- 2 bit nucleic acid code
- ncbi4na NCBI4na , -- 4 bit nucleic acid code
- ncbi8na NCBI8na , -- 8 bit extended nucleic acid code
- ncbipna NCBIpna , -- nucleic acid probabilities
- ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes
- ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes
- ncbipaa NCBIpaa , -- amino acid probabilities
- ncbistdaa NCBIstdaa } -- consecutive codes for std aas
- IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces
- IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces
- NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T
- NCBI4na ::= OCTET STRING -- 1 bit each for agct
- -- 0001=A, 0010=C, 0100=G, 1000=T/U
- -- 0101=Purine, 1010=Pyrimidine, etc
- NCBI8na ::= OCTET STRING -- for modified nucleic acids
- NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n
- -- probabilities are coded 0-255 = 0.0-1.0
- NCBI8aa ::= OCTET STRING -- for modified amino acids
- NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes
- -- IUPAC codes + U=selenocysteine
- NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order:
- -- A-Y,B,Z,X,(ter),anything
- -- probabilities are coded 0-255 = 0.0-1.0
- NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte
- --*** Sequence Annotation *************************************
- --*
- Annot-id ::= CHOICE {
- local Object-id ,
- ncbi INTEGER ,
- general Dbtag }
-
- Annot-descr ::= SET OF Annotdesc
- Annotdesc ::= CHOICE {
- name VisibleString , -- a short name for this collection
- title VisibleString , -- a title for this collection
- comment VisibleString , -- a more extensive comment
- pub Pubdesc , -- a reference to the publication
- user User-object , -- user defined object
- create-date Date , -- date entry first created/released
- update-date Date , -- date of last update
- src Seq-id , -- source sequence from which annot came
- align Align-def, -- definition of the SeqAligns
- region Seq-loc } -- all contents cover this region
- Align-def ::= SEQUENCE {
- align-type INTEGER { -- class of align Seq-annot
- ref (1) , -- set of alignments to the same sequence
- alt (2) , -- set of alternate alignments of the same seqs
- blocks (3) , -- set of aligned blocks in the same seqs
- other (255) } ,
- ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
- Seq-annot ::= SEQUENCE {
- id SET OF Annot-id OPTIONAL ,
- db INTEGER { -- source of annotation
- genbank (1) ,
- embl (2) ,
- ddbj (3) ,
- pir (4) ,
- sp (5) ,
- bbone (6) ,
- pdb (7) ,
- other (255) } OPTIONAL ,
- name VisibleString OPTIONAL ,-- source if "other" above
- desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots
- data CHOICE {
- ftable SET OF Seq-feat ,
- align SET OF Seq-align ,
- graph SET OF Seq-graph ,
- ids SET OF Seq-id , -- used for communication between tools
- locs SET OF Seq-loc } } -- used for communication between tools
- END