生物技术

开发平台：
C/C++

fasta.hpp：源码内容
							/*
 * ===========================================================================
 * PRODUCTION $Log: fasta.hpp,v $
 * PRODUCTION Revision 1000.1  2004/04/12 17:35:07  gouriano
 * PRODUCTION PRODUCTION: UPGRADED [CATCHUP_003] Dev-tree R1.5
 * PRODUCTION
 * ===========================================================================
 */
#ifndef OBJTOOLS_READERS___FASTA__HPP
#define OBJTOOLS_READERS___FASTA__HPP
/*  $Id: fasta.hpp,v 1000.1 2004/04/12 17:35:07 gouriano Exp $
* ===========================================================================
*
*                            PUBLIC DOMAIN NOTICE
*               National Center for Biotechnology Information
*
*  This software/database is a "United States Government Work" under the
*  terms of the United States Copyright Act.  It was written as part of
*  the author's official duties as a United States Government employee and
*  thus cannot be copyrighted.  This software/database is freely available
*  to the public for use. The National Library of Medicine and the U.S.
*  Government have not placed any restriction on its use or reproduction.
*
*  Although all reasonable efforts have been taken to ensure the accuracy
*  and reliability of the software and data, the NLM and the U.S.
*  Government do not and cannot warrant the performance or results that
*  may be obtained by using this software or data. The NLM and the U.S.
*  Government disclaim all warranties, express or implied, including
*  warranties of performance, merchantability or fitness for any particular
*  purpose.
*
*  Please cite the author in any work or product based on this material.
*
* ===========================================================================
*
* Authors:  Aaron Ucko, NCBI;  Anatoliy Kuznetsov, NCBI.
*
* File Description:
*   Reader for FASTA-format sequences.  (The writer is CFastaOStream, in
*   <objmgr/util/sequence.hpp>.)
*
*/
#include <objects/seqset/Seq_entry.hpp>
BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)
enum EReadFastaFlags {
    fReadFasta_AssumeNuc  = 0x1,  // type to use if no revealing accn found
    fReadFasta_AssumeProt = 0x2,
    fReadFasta_ForceType  = 0x4,  // force type regardless of accession
    fReadFasta_NoParseID  = 0x8,  // treat name as local ID regardless of |s
    fReadFasta_ParseGaps  = 0x10, // make a delta sequence if gaps found
    fReadFasta_OneSeq     = 0x20, // just read the first sequence found
    fReadFasta_AllSeqIds  = 0x40, // read Seq-ids past the first ^A (see note)
    fReadFasta_NoSeqData  = 0x80  // parse the deflines but skip the data
};
typedef int TReadFastaFlags; // binary OR of EReadFastaFlags
// Note on fReadFasta_AllSeqIds: some databases (notably nr) have
// merged identical sequences, stringing their deflines together with
// control-As.  Normally, the reader stops at the first control-A;
// however, this flag makes it parse all the IDs.
// keeps going until EOF or parse error (-> CParseException) unless
// fReadFasta_OneSeq is set
// see also CFastaOstream in <objmgr/util/sequence.hpp> (-lxobjutil)
NCBI_XOBJREAD_EXPORT
CRef<CSeq_entry> ReadFasta(CNcbiIstream& in, TReadFastaFlags flags = 0,
                           int* counter = 0,
                           vector<CConstRef<CSeq_loc> >* lcv = 0);
//////////////////////////////////////////////////////////////////
//
// Class - description of multi-entry FASTA file,
// to keep list of offsets on all molecules in the file.
//
struct SFastaFileMap
{
    struct SFastaEntry
    {
        string  seq_id;        // Sequence Id
        string  description;   // Molecule description
        size_t  stream_offset; // Molecule offset in file
    };
    typedef vector<SFastaEntry>  TMapVector;
    TMapVector   file_map; // vector keeps list of all molecule entries
};
// Function reads input stream (assumed that it is FASTA format) one
// molecule entry after another filling the map structure describing and
// pointing on molecule entries. Fasta map can be used later for quick
// CSeq_entry retrival
void NCBI_XOBJREAD_EXPORT ReadFastaFileMap(SFastaFileMap* fasta_map, 
                                           CNcbiIfstream& input);
END_SCOPE(objects)
END_NCBI_SCOPE
/*
* ===========================================================================
*
* $Log: fasta.hpp,v $
* Revision 1000.1  2004/04/12 17:35:07  gouriano
* PRODUCTION: UPGRADED [CATCHUP_003] Dev-tree R1.5
*
* Revision 1.5  2004/01/20 16:27:53  ucko
* Fix a stray reference to sequence.hpp's old location.
*
* Revision 1.4  2003/08/08 21:31:37  dondosha
* Changed type of lcase_mask in ReadFasta to vector of CConstRefs
*
* Revision 1.3  2003/08/07 21:12:56  ucko
* Support a counter for assigning local IDs to sequences with no ID given.
*
* Revision 1.2  2003/08/06 19:08:28  ucko
* Slight interface tweak to ReadFasta: report lowercase locations in a
* vector with one entry per Bioseq rather than a consolidated Seq_loc_mix.
*
* Revision 1.1  2003/06/04 17:26:08  ucko
* Split out from Seq_entry.hpp.
*
*
* ===========================================================================
*/
#endif  /* OBJTOOLS_READERS___FASTA__HPP */