生物技术

开发平台：
C/C++

agp_read.cpp：源码内容
							/*
 * ===========================================================================
 * PRODUCTION $Log: agp_read.cpp,v $
 * PRODUCTION Revision 1000.1  2004/06/01 19:46:12  gouriano
 * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6
 * PRODUCTION
 * ===========================================================================
 */
/*  $Id: agp_read.cpp,v 1000.1 2004/06/01 19:46:12 gouriano Exp $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Authors: Josh Cherry
 *
 * File Description:  Read agp file
 */
#include <ncbi_pch.hpp>
#include <objtools/readers/agp_read.hpp>
#include <objtools/readers/reader_exception.hpp>
#include <objects/seqloc/Seq_interval.hpp>
#include <objects/seqloc/Seq_loc.hpp>
#include <objects/seqset/Seq_entry.hpp>
#include <objects/seq/Delta_seq.hpp>
#include <objects/seq/Seq_inst.hpp>
#include <objects/seq/Seq_literal.hpp>
#include <objects/seq/Seq_ext.hpp>
#include <objects/seq/Delta_ext.hpp>
BEGIN_NCBI_SCOPE
USING_SCOPE(objects);
CRef<CBioseq_set> AgpRead(CNcbiIstream& is)
{
    vector<CRef<CSeq_entry> > entries;
    AgpRead(is, entries);
    CRef<CBioseq_set> bioseq_set(new CBioseq_set);
    ITERATE (vector<CRef<CSeq_entry> >, iter, entries) {
        bioseq_set->SetSeq_set().push_back(*iter);
    }
    return bioseq_set;
}
void AgpRead(CNcbiIstream& is, vector<CRef<CSeq_entry> >& entries)
{
    vector<CRef<CBioseq> > bioseqs;
    AgpRead(is, bioseqs);
    NON_CONST_ITERATE (vector<CRef<CBioseq> >, bioseq, bioseqs) {
        CRef<CSeq_entry> entry(new CSeq_entry);
        entry->SetSeq(**bioseq);
        entries.push_back(entry);
    }
}
void AgpRead(CNcbiIstream& is, vector<CRef<CBioseq> >& bioseqs)
{
    string line;
    vector<string> fields;
    string current_object;
    CRef<CBioseq> bioseq;
    CRef<CSeq_inst> seq_inst;
    int last_to;
    int part_num, last_part_num;
    TSeqPos length;
    int line_num = 0;
    while (NcbiGetlineEOL(is, line)) {
        line_num++;
        if (line[0] == '#') {
            // comment line; skip
            continue;
        }
        if (line.find_first_not_of(" tnr") == string::npos) {
            // skip lines containing only white space
            continue;
        }
        fields.clear();
        NStr::Tokenize(line, "t", fields);
        // Number of fields can be 9 or 8, but 8 is valid
        // only if field[4] == "N".
        if (fields.size() != 9) {
            if (fields.size() >= 5 && fields[4] != "N") {
                NCBI_THROW2(CObjReaderParseException, eFormat,
                            string("error at line ") + 
                            NStr::IntToString(line_num) + ": found " +
                            NStr::IntToString(fields.size()) +
                            " columns; there should be 9",
                            is.tellg() - CT_POS_TYPE(0));
            } else if (fields.size() != 8) {
                NCBI_THROW2(CObjReaderParseException, eFormat,
                            string("error at line ") + 
                            NStr::IntToString(line_num) + ": found " +
                            NStr::IntToString(fields.size()) +
                            " columns; there should be 8 or 9",
                            is.tellg() - CT_POS_TYPE(0));
            }
        }
        if (fields[0] != current_object || !bioseq) {
            // close out old one, start a new one
            if (bioseq) {
                seq_inst->SetLength(length);
                bioseq->SetInst(*seq_inst);
                bioseqs.push_back(bioseq);
            }
            current_object = fields[0];
            seq_inst.Reset(new CSeq_inst);
            seq_inst->SetRepr(CSeq_inst::eRepr_delta);
            seq_inst->SetMol(CSeq_inst::eMol_dna);
            bioseq.Reset(new CBioseq);
            CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Local,
                                         current_object, current_object));
            bioseq->SetId().push_back(id);
            last_to = 0;
            last_part_num = 0;
            length = 0;
        }
        // validity checks
        part_num = NStr::StringToInt(fields[3]);
        if (part_num != last_part_num + 1) {
            NCBI_THROW2(CObjReaderParseException, eFormat,
                        string("error at line ") + 
                        NStr::IntToString(line_num) +
                        ": part number out of order",
                        is.tellg() - CT_POS_TYPE(0));
        }
        last_part_num = part_num;
        if (NStr::StringToInt(fields[1]) != last_to + 1) {
            NCBI_THROW2(CObjReaderParseException, eFormat,
                        string("error at line ") + 
                         NStr::IntToString(line_num) +
                         ": begining not equal to previous end + 1",
                         is.tellg() - CT_POS_TYPE(0));
        }
        last_to = NStr::StringToInt(fields[2]);
        // build a Delta-seq, either a Seq-literal (for a gap) or a Seq-loc 
        CRef<CDelta_seq> delta_seq(new CDelta_seq);
        if (fields[4] == "N") {
            // a gap
            TSeqPos gap_len = NStr::StringToInt(fields[5]);
            delta_seq->SetLiteral().SetLength(gap_len);
            length += gap_len;
        } else if (fields[4].size() == 1 && 
                   fields[4].find_first_of("ADFGPOW") == 0) {
            CSeq_loc& loc = delta_seq->SetLoc();
            CRef<CSeq_id> comp_id(new CSeq_id(fields[5]));
            loc.SetInt().SetId(*comp_id);
            loc.SetInt().SetFrom(NStr::StringToInt(fields[6]) - 1);
            loc.SetInt().SetTo  (NStr::StringToInt(fields[7]) - 1);
            length += loc.GetInt().GetTo() - loc.GetInt().GetFrom() + 1;
            if (fields[8] == "+") {
                loc.SetInt().SetStrand(eNa_strand_plus);
            } else if (fields[8] == "-") {
                loc.SetInt().SetStrand(eNa_strand_minus);
            } else if (fields[8] == "0") {
                loc.SetInt().SetStrand(eNa_strand_unknown);
            } else if (fields[8] == "na") {
                loc.SetInt().SetStrand(eNa_strand_other);
            } else {
                NCBI_THROW2(CObjReaderParseException, eFormat,
                            string("error at line ") + 
                            NStr::IntToString(line_num) + ": invalid "
                            "orientation " + fields[8],
                            is.tellg() - CT_POS_TYPE(0));
            }
        } else {
            NCBI_THROW2(CObjReaderParseException, eFormat,
                        string("error at line ") + 
                        NStr::IntToString(line_num) + ": invalid "
                        "component type " + fields[4],
                        is.tellg() - CT_POS_TYPE(0));
        }
        seq_inst->SetExt().SetDelta().Set().push_back(delta_seq);
    }
    // deal with the last one
    if (bioseq) {
        seq_inst->SetLength(length);
        bioseq->SetInst(*seq_inst);
        bioseqs.push_back(bioseq);
    }
}
END_NCBI_SCOPE
/*
 * =====================================================================
 * $Log: agp_read.cpp,v $
 * Revision 1000.1  2004/06/01 19:46:12  gouriano
 * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6
 *
 * Revision 1.6  2004/05/25 20:49:58  jcherry
 * Let the last column of an "N" line be missing, not just empty
 *
 * Revision 1.5  2004/05/21 21:42:55  gorelenk
 * Added PCH ncbi_pch.hpp
 *
 * Revision 1.4  2004/02/19 22:57:52  ucko
 * Accommodate stricter implementations of CT_POS_TYPE.
 *
 * Revision 1.3  2004/01/05 23:01:37  jcherry
 * Support unknown ("0") or irrelevant ("na") strand designation
 *
 * Revision 1.2  2003/12/08 23:39:20  jcherry
 * Set length of Seq-inst
 *
 * Revision 1.1  2003/12/08 15:49:32  jcherry
 * Initial version
 *
 * =====================================================================
 */