生物技术

开发平台：

C/C++

seqport_util.cpp：源码内容

// Allocate memory for out_seq
out_seq_data.resize(uLength);
// Get iterator for out_seq_data
string::iterator i_out = out_seq_data.begin();
// Declare iterator for in_seq_data and determine begin and end
vector<char>::const_iterator i_in;
vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
vector<char>::const_iterator i_in_end = i_in_begin + uLength;
// Loop through input and convert to output
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
*(i_out++) =
m_NcbistdaaNcbieaa->m_Table[static_cast<unsigned char>(*i_in)];
return uLength;
}
// Function to convert ncbistdaa (byte) to iupacaa (byte)
TSeqPos CSeqportUtil_implementation::MapNcbistdaaToIupacaa
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
// Get read & write reference to out_seq data
out_seq->Reset();
string& out_seq_data = out_seq->SetIupacaa().Set();
// If uBeginIdx beyond end of in_seq, return
if(uBeginIdx >= in_seq_data.size())
return 0;
// Adjust uBeginIdx and uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Allocate memory for out_seq
out_seq_data.resize(uLength);
// Get iterator for out_seq_data
string::iterator i_out = out_seq_data.begin();
// Declare iterator for in_seq_data and determine begin and end
vector<char>::const_iterator i_in;
vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
vector<char>::const_iterator i_in_end = i_in_begin + uLength;
// Loop through input and convert to output
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(i_out++)) =
m_NcbistdaaIupacaa->m_Table[static_cast<unsigned char>(*i_in)];
return uLength;
}
*/
// Fast validation of iupacna sequence
bool CSeqportUtil_implementation::FastValidateIupacna
(const CSeq_data& in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const string& in_seq_data = in_seq.GetIupacna().Get();
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return true;
// Adjust uBeginIdx, uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Declare in iterator on in_seq and determine begin and end
string::const_iterator itor;
string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
string::const_iterator e_itor = b_itor + uLength;
// Perform Fast Validation
unsigned char ch = 'x00';
for(itor = b_itor; itor != e_itor; ++itor)
ch |= m_Iupacna->m_Table[static_cast<unsigned char>(*itor)];
// Return true if valid, otherwise false
return (ch != 255);
}
bool CSeqportUtil_implementation::FastValidateNcbieaa
(const CSeq_data& in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const string& in_seq_data = in_seq.GetNcbieaa().Get();
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return true;
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return true;
// Adjust uBeginIdx, uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Declare in iterator on in_seq and determine begin and end
string::const_iterator itor;
string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
string::const_iterator e_itor = b_itor + uLength;
// Perform Fast Validation
unsigned char ch = 'x00';
for(itor = b_itor; itor != e_itor; ++itor)
ch |= m_Ncbieaa->m_Table[static_cast<unsigned char>(*itor)];
// Return true if valid, otherwise false
return (ch != 255);
}
bool CSeqportUtil_implementation::FastValidateNcbistdaa
(const CSeq_data& in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return true;
// Adjust uBeginIdx, uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Declare in iterator on in_seq and determine begin and end
vector<char>::const_iterator itor;
vector<char>::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
vector<char>::const_iterator e_itor = b_itor + uLength;
// Perform Fast Validation
unsigned char ch = 'x00';
for(itor = b_itor; itor != e_itor; ++itor)
ch |= m_Ncbistdaa->m_Table[static_cast<unsigned char>(*itor)];
// Return true if valid, otherwise false
return (ch != 255);
}
bool CSeqportUtil_implementation::FastValidateIupacaa
(const CSeq_data& in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const string& in_seq_data = in_seq.GetIupacaa().Get();
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return true;
// Adjust uBeginIdx, uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Declare in iterator on in_seq and determine begin and end
string::const_iterator itor;
string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
string::const_iterator e_itor = b_itor + uLength;
// Perform Fast Validation
unsigned char ch = 'x00';
for(itor=b_itor; itor!=e_itor; ++itor)
ch |= m_Iupacaa->m_Table[static_cast<unsigned char>(*itor)];
// Return true if valid, otherwise false
return (ch != 255);
}
void CSeqportUtil_implementation::ValidateIupacna
(const CSeq_data& in_seq,
vector<TSeqPos>* badIdx,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const string& in_seq_data = in_seq.GetIupacna().Get();
// clear out_indices
badIdx->clear();
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return;
// Adjust uBeginIdx, uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Declare in iterator on in_seq and determine begin and end
string::const_iterator itor;
string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
string::const_iterator e_itor = b_itor + uLength;
// Perform Validation
TSeqPos nIdx = uBeginIdx;
for(itor = b_itor; itor != e_itor; ++itor)
if(m_Iupacna->m_Table[static_cast<unsigned char>(*itor)] == char(255))
badIdx->push_back(nIdx++);
else
nIdx++;
// Return list of bad indices
return;
}
void CSeqportUtil_implementation::ValidateNcbieaa
(const CSeq_data& in_seq,
vector<TSeqPos>* badIdx,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const string& in_seq_data = in_seq.GetNcbieaa().Get();
// clear badIdx
badIdx->clear();
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return;
// Adjust uBeginIdx, uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Declare in iterator on in_seq and determine begin and end
string::const_iterator itor;
string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
string::const_iterator e_itor = b_itor + uLength;
// Perform Validation
TSeqPos nIdx = uBeginIdx;
for(itor = b_itor; itor != e_itor; ++itor)
if(m_Ncbieaa->m_Table[static_cast<unsigned char>(*itor)] == char(255))
badIdx->push_back(nIdx++);
else
nIdx++;
// Return vector of bad indices
return;
}
void CSeqportUtil_implementation::ValidateNcbistdaa
(const CSeq_data& in_seq,
vector<TSeqPos>* badIdx,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
// Create a vector to return
badIdx->clear();
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return;
// Adjust uBeginIdx, uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Declare in iterator on in_seq and determine begin and end
vector<char>::const_iterator itor;
vector<char>::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
vector<char>::const_iterator e_itor = b_itor + uLength;
// Perform Validation
TSeqPos nIdx = uBeginIdx;
for(itor=b_itor; itor!=e_itor; ++itor)
if(m_Ncbistdaa->m_Table[static_cast<unsigned char>(*itor)]==char(255))
badIdx->push_back(nIdx++);
else
nIdx++;
// Return vector of bad indices
return;
}
void CSeqportUtil_implementation::ValidateIupacaa
(const CSeq_data& in_seq,
vector<TSeqPos>* badIdx,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const string& in_seq_data = in_seq.GetIupacaa().Get();
// Create a vector to return
badIdx->clear();
// Check that uBeginIdx is not beyond end of in_seq
if(uBeginIdx >= in_seq_data.size())
return;
// Adjust uBeginIdx, uLength
Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
// Declare in iterator on in_seq and determine begin and end
string::const_iterator itor;
string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
string::const_iterator e_itor = b_itor + uLength;
// Perform Validation
TSeqPos nIdx = uBeginIdx;
for(itor=b_itor; itor!=e_itor; ++itor)
if(m_Iupacaa->m_Table[static_cast<unsigned char>(*itor)] == char(255))
badIdx->push_back(nIdx++);
else
nIdx++;
// Return vector of bad indices
return;
}
// Function to make copy of ncbi2na type sequences
TSeqPos CSeqportUtil_implementation::GetNcbi2naCopy
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get reference to out_seq data
out_seq->Reset();
vector<char>& out_seq_data = out_seq->SetNcbi2na().Set();
// Get reference to in_seq data
const vector<char>& in_seq_data = in_seq.GetNcbi2na().Get();
// Return if uBeginIdx is after end of in_seq
if(uBeginIdx >= 4 * in_seq_data.size())
return 0;
// Set uLength to actual valid length in out_seq
if( (uLength ==0) || ((uBeginIdx + uLength) > (4*in_seq_data.size() )) )
uLength = 4*in_seq_data.size() - uBeginIdx;
// Allocate memory for out_seq data
if((uLength % 4) == 0)
out_seq_data.resize(uLength/4);
else
out_seq_data.resize(uLength/4 + 1);
// Get iterator on out_seq_data
vector<char>::iterator i_out = out_seq_data.begin() - 1;
// Calculate amounts to shift bits
unsigned int lShift, rShift;
lShift = 2*(uBeginIdx % 4);
rShift = 8 - lShift;
// Get interators on in_seq
vector<char>::const_iterator i_in;
vector<char>::const_iterator i_in_begin =
in_seq_data.begin() + uBeginIdx/4;
// Determine number of input bytes to process
SIZE_TYPE uNumBytes = uLength/4;
if((uLength % 4) != 0)
++uNumBytes;
// Prevent access beyond end of in_seq_data
bool bDoLastByte = false;
if((uBeginIdx/4 + uNumBytes) >= in_seq_data.size())
{
uNumBytes = in_seq_data.size() - uBeginIdx/4 - 1;
bDoLastByte = true;
}
vector<char>::const_iterator i_in_end = i_in_begin + uNumBytes;
// Loop through input sequence and copy to output sequence
if(lShift > 0)
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(++i_out)) =
((*i_in) << lShift) | (((*(i_in+1)) & 255) >> rShift);
else
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(++i_out)) = (*i_in);
// Handle last input byte if necessary
if(bDoLastByte)
(*(++i_out)) = (*i_in) << lShift;
return uLength;
}
// Function to make copy of ncbi4na type sequences
TSeqPos CSeqportUtil_implementation::GetNcbi4naCopy
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get reference to out_seq data
out_seq->Reset();
vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
// Get reference to in_seq data
const vector<char>& in_seq_data = in_seq.GetNcbi4na().Get();
// Return if uBeginIdx is after end of in_seq
if(uBeginIdx >= 2 * in_seq_data.size())
return 0;
// Set uLength to actual valid length in out_seq
if( (uLength ==0) || ((uBeginIdx + uLength) > (2*in_seq_data.size() )) )
uLength = 2*in_seq_data.size() - uBeginIdx;
// Allocate memory for out_seq data
if((uLength % 2) == 0)
out_seq_data.resize(uLength/2);
else
out_seq_data.resize(uLength/2 + 1);
// Get iterator on out_seq_data
vector<char>::iterator i_out = out_seq_data.begin() - 1;
// Calculate amounts to shift bits
unsigned int lShift, rShift;
lShift = 4*(uBeginIdx % 2);
rShift = 8 - lShift;
// Get interators on in_seq
vector<char>::const_iterator i_in;
vector<char>::const_iterator i_in_begin =
in_seq_data.begin() + uBeginIdx/2;
// Determine number of input bytes to process
SIZE_TYPE uNumBytes = uLength/2;
if((uLength % 2) != 0)
++uNumBytes;
// Prevent access beyond end of in_seq_data
bool bDoLastByte = false;
if((uBeginIdx/2 + uNumBytes) >= in_seq_data.size())
{
uNumBytes = in_seq_data.size() - uBeginIdx/2 - 1;
bDoLastByte = true;
}
vector<char>::const_iterator i_in_end = i_in_begin + uNumBytes;
// Loop through input sequence and copy to output sequence
if(lShift > 0)
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(++i_out)) =
((*i_in) << lShift) | (((*(i_in+1)) & 255) >> rShift);
else
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(++i_out)) = (*i_in);
// Handle last input byte
if(bDoLastByte)
(*(++i_out)) = (*i_in) << lShift;
return uLength;
}
// Function to make copy of iupacna type sequences
TSeqPos CSeqportUtil_implementation::GetIupacnaCopy
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get reference to out_seq data
out_seq->Reset();
string& out_seq_data = out_seq->SetIupacna().Set();
// Get reference to in_seq data
const string& in_seq_data = in_seq.GetIupacna().Get();
// Return if uBeginIdx is after end of in_seq
if(uBeginIdx >= in_seq_data.size())
return 0;
// Set uLength to actual valid length in out_seq
if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
uLength = in_seq_data.size() - uBeginIdx;
// Allocate memory for out_seq data
out_seq_data.resize(uLength);
// Get iterator on out_seq_data
string::iterator i_out = out_seq_data.begin() - 1;
// Get interators on in_seq
string::const_iterator i_in;
string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
string::const_iterator i_in_end = i_in_begin + uLength;
// Loop through input sequence and copy to output sequence
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(++i_out)) = (*i_in);
return uLength;
}
// Function to make copy of ncbieaa type sequences
TSeqPos CSeqportUtil_implementation::GetNcbieaaCopy
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get reference to out_seq data
out_seq->Reset();
string& out_seq_data = out_seq->SetNcbieaa().Set();
// Get reference to in_seq data
const string& in_seq_data = in_seq.GetNcbieaa().Get();
// Return if uBeginIdx is after end of in_seq
if(uBeginIdx >= in_seq_data.size())
return 0;
// Set uLength to actual valid length in out_seq
if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
uLength = in_seq_data.size() - uBeginIdx;
// Allocate memory for out_seq data
out_seq_data.resize(uLength);
// Get iterator on out_seq_data
string::iterator i_out = out_seq_data.begin() - 1;
// Get interators on in_seq
string::const_iterator i_in;
string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
string::const_iterator i_in_end = i_in_begin + uLength;
// Loop through input sequence and copy to output sequence
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(++i_out)) = (*i_in);
return uLength;
}
// Function to make copy of ncbistdaa type sequences
TSeqPos CSeqportUtil_implementation::GetNcbistdaaCopy
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get reference to out_seq data
out_seq->Reset();
vector<char>& out_seq_data = out_seq->SetNcbistdaa().Set();
// Get reference to in_seq data
const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
// Return if uBeginIdx is after end of in_seq
if(uBeginIdx >= in_seq_data.size())
return 0;
// Set uLength to actual valid length in out_seq
if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
uLength = in_seq_data.size() - uBeginIdx;
// Allocate memory for out_seq data
out_seq_data.resize(uLength);
// Get iterator on out_seq_data
vector<char>::iterator i_out = out_seq_data.begin() - 1;
// Get interators on in_seq
vector<char>::const_iterator i_in;
vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
vector<char>::const_iterator i_in_end = i_in_begin + uLength;
// Loop through input sequence and copy to output sequence
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(++i_out)) = (*i_in);
return uLength;
}
// Function to make copy of iupacaa type sequences
TSeqPos CSeqportUtil_implementation::GetIupacaaCopy
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get reference to out_seq data
out_seq->Reset();
string& out_seq_data = out_seq->SetIupacaa().Set();
// Get reference to in_seq data
const string& in_seq_data = in_seq.GetIupacaa().Get();
// Return if uBeginIdx is after end of in_seq
if(uBeginIdx >= in_seq_data.size())
return 0;
// Set uLength to actual valid length in out_seq
if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
uLength = in_seq_data.size() - uBeginIdx;
// Allocate memory for out_seq data
out_seq_data.resize(uLength);
// Get iterator on out_seq_data
string::iterator i_out = out_seq_data.begin() - 1;
// Get interators on in_seq
string::const_iterator i_in;
string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
string::const_iterator i_in_end = i_in_begin + uLength;
// Loop through input sequence and copy to output sequence
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*(++i_out)) = (*i_in);
return uLength;
}
// Function to adjust uBeginIdx to lie on an in_seq byte boundary
// and uLength to lie on on an out_seq byte boundary. Returns
// overhang
TSeqPos CSeqportUtil_implementation::Adjust
(TSeqPos* uBeginIdx,
TSeqPos* uLength,
TSeqPos uInSeqBytes,
TSeqPos uInSeqsPerByte,
TSeqPos uOutSeqsPerByte)
const
{
// Adjust uBeginIdx and uLength to acceptable values
// If uLength = 0, assume convert to end of sequence
if(*uLength == 0)
*uLength = uInSeqsPerByte * uInSeqBytes;
// Ensure that uBeginIdx does not start at or after end of in_seq_data
if(*uBeginIdx >= uInSeqsPerByte * uInSeqBytes)
*uBeginIdx = uInSeqsPerByte * uInSeqBytes - uInSeqsPerByte;
// Ensure that uBeginIdx is a multiple of uInSeqsPerByte and adjust uLength
*uLength += *uBeginIdx % uInSeqsPerByte;
*uBeginIdx = uInSeqsPerByte * (*uBeginIdx/uInSeqsPerByte);
// Adjust uLength so as not to go beyond end of in_seq_data
if(*uLength > uInSeqsPerByte * uInSeqBytes - *uBeginIdx)
*uLength = uInSeqsPerByte * uInSeqBytes - *uBeginIdx;
// Adjust uLength down to multiple of uOutSeqsPerByte
// and calculate overhang (overhang handled separately at end)
TSeqPos uOverhang = *uLength % uOutSeqsPerByte;
*uLength = uOutSeqsPerByte * (*uLength / uOutSeqsPerByte);
return uOverhang;
}
// Loops through an ncbi4na input sequence and determines
// the ambiguities that would result from conversion to an ncbi2na sequence
// On return, out_seq contains the ncbi4na bases that become ambiguous and
// out_indices contains the indices of the abiguous bases in in_seq
TSeqPos CSeqportUtil_implementation::GetAmbigs_ncbi4na_ncbi2na
(const CSeq_data& in_seq,
CSeq_data* out_seq,
vector<TSeqPos>* out_indices,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const vector<char>& in_seq_data = in_seq.GetNcbi4na().Get();
// Get read & write reference to out_seq data
out_seq->Reset();
vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
// Adjust uBeginIdx and uLength, if necessary
if(uBeginIdx >= 2*in_seq_data.size())
return 0;
if((uLength == 0) || (((uBeginIdx + uLength) > 2*in_seq_data.size())))
uLength = 2*in_seq_data.size() - uBeginIdx;
// Save uBeginIdx and adjust uBeginIdx = 0 mod 2
TSeqPos uBeginSav = uBeginIdx;
TSeqPos uLenSav = uLength;
uLength += uBeginIdx % 2;
uBeginIdx = 2*(uBeginIdx/2);
// Allocate memory for out_seq_data and out_indices
// Note, these will be shrunk at the end to correspond
// to actual memory needed. Note, in test cases, over 50% of the
// time spent in this method is spent in the next two
// statements and 3/4 of that is spent in the second statement.
out_seq_data.resize(uLength/2 + (uLength % 2));
out_indices->resize(uLength);
// Variable to track number of ambigs
TSeqPos uNumAmbigs = 0;
// Get iterators to input sequence
vector<char>::const_iterator i_in;
vector<char>::const_iterator i_in_begin =
in_seq_data.begin() + uBeginIdx/2;
vector<char>::const_iterator i_in_end =
i_in_begin + uLength/2 + (uLength % 2);
// Get iterators to out_seq_data and out_indices
vector<char>::iterator i_out_seq = out_seq_data.begin();
vector<TSeqPos>::iterator i_out_idx = out_indices->begin();
// Index of current input seq base
TSeqPos uIdx = uBeginIdx;
// Loop through input sequence looking for ambiguities
for(i_in = i_in_begin; i_in != i_in_end; ++i_in) {
switch (m_DetectAmbigNcbi4naNcbi2na->m_Table
[static_cast<unsigned char>(*i_in)]) {
case 1: // Low order input nible ambiguous
// Put low order input nible in low order output nible
if(uNumAmbigs & 1)
{
(*i_out_seq) |= (*i_in) & 'x0f';
++i_out_seq;
}
// Put low order input nible in high order output nible
else
(*i_out_seq) = (*i_in) << 4;
// Record input index that was ambiguous
(*i_out_idx) = uIdx + 1;
++i_out_idx;
// Increment number of ambiguities
uNumAmbigs++;
break;
case 2: // High order input nible ambiguous
// Put high order input nible in low order output nible
if(uNumAmbigs & 1)
{
(*i_out_seq) |= ((*i_in) >> 4) & 'x0f';
++i_out_seq;
}
// Put high order input nible in high order output nible
else
(*i_out_seq) = (*i_in) & 'xf0';
// Record input index that was ambiguous
(*i_out_idx) = uIdx;
++i_out_idx;
// Increment number of ambiguities
uNumAmbigs++;
break;
case 3: // Both input nibles ambiguous
// Put high order input nible in low order
// output nible, move to the next output byte
// and put the low order input nibble in the
// high order output nible.
if(uNumAmbigs & 1)
{
(*i_out_seq) |= ((*i_in) >> 4) & 'x0f';
(*(++i_out_seq)) = (*i_in) << 4;
}
// Put high order input nible in high order
// output nible, put low order input nible
// in low order output nible, and move to
// next output byte
else
{
(*i_out_seq) = (*i_in);
++i_out_seq;
}
// Record indices that were ambiguous
(*i_out_idx) = uIdx;
(*(++i_out_idx)) = uIdx + 1;
++i_out_idx;
// Increment the number of ambiguities
uNumAmbigs+=2;
break;
}
// Increment next input byte.
uIdx += 2;
}
// Shrink out_seq_data and out_indices to actual sizes needed
out_indices->resize(uNumAmbigs);
out_seq_data.resize(uNumAmbigs/2 + uNumAmbigs % 2);
// Check to ensure that ambigs outside of requested range are not included
TSeqPos uKeepBeg = 0;
TSeqPos uKeepLen = 0;
if((*out_indices)[0] < uBeginSav)
{
uKeepBeg = 1;
out_indices->erase(out_indices->begin(), out_indices->begin() + 1);
}
if((*out_indices)[out_indices->size()-1] >= uBeginSav + uLenSav)
{
out_indices->pop_back();
uKeepLen = out_indices->size();
}
if((uKeepBeg != 0) || (uKeepLen != 0))
uNumAmbigs = KeepNcbi4na(out_seq, uKeepBeg, uKeepLen);
return uNumAmbigs;
}
// Loops through an iupacna input sequence and determines
// the ambiguities that would result from conversion to an ncbi2na sequence.
// On return, out_seq contains the iupacna bases that become ambiguous and
// out_indices contains the indices of the abiguous bases in in_seq. The
// return is the number of ambiguities found.
TSeqPos CSeqportUtil_implementation::GetAmbigs_iupacna_ncbi2na
(const CSeq_data& in_seq,
CSeq_data* out_seq,
vector<TSeqPos>* out_indices,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get read-only reference to in_seq data
const string& in_seq_data = in_seq.GetIupacna().Get();
// Get read & write reference to out_seq data
out_seq->Reset();
string& out_seq_data = out_seq->SetIupacna().Set();
// Validate/adjust uBeginIdx and uLength
if(uBeginIdx >= in_seq_data.size())
return 0;
if((uLength == 0) || ((uBeginIdx + uLength) > in_seq_data.size()))
uLength = in_seq_data.size() - uBeginIdx;
// Allocate memory for out_seq_data and out_indices
// Note, these will be shrunk at the end to correspond
// to actual memory needed.
out_seq_data.resize(uLength);
out_indices->resize(uLength);
// Variable to track number of ambigs
TSeqPos uNumAmbigs = 0;
// Get iterators to input sequence
string::const_iterator i_in;
string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
string::const_iterator i_in_end = i_in_begin + uLength;
// Get iterators to out_seq_data and out_indices
string::iterator i_out_seq = out_seq_data.begin();
vector<TSeqPos>::iterator i_out_idx = out_indices->begin();
// Index of current input seq base
TSeqPos uIdx = uBeginIdx;
// Loop through input sequence looking for ambiguities
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
{
if(m_DetectAmbigIupacnaNcbi2na->m_Table
[static_cast<unsigned char>(*i_in)] == 1)
{
(*i_out_seq) = (*i_in);
++i_out_seq;
(*i_out_idx) = uIdx;
++i_out_idx;
++uNumAmbigs;
}
++uIdx;
}
out_seq_data.resize(uNumAmbigs);
out_indices->resize(uNumAmbigs);
return uNumAmbigs;
}
// Method to implement Keep for Ncbi2na. Returns length of
// kept sequence
TSeqPos CSeqportUtil_implementation::KeepNcbi2na
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get a reference to in_seq
vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
// If uBeginIdx past the end of in_seq, return empty in_seq
if(uBeginIdx >= in_seq_data.size()*4)
{
in_seq_data.clear();
return 0;
}
// If uLength == 0, Keep from uBeginIdx to end of in_seq
if(uLength == 0)
uLength = 4*in_seq_data.size() - uBeginIdx;
// If uLength goes beyond the end of the sequence, trim
// it back to the end of the sequence
if(uLength > (4*in_seq_data.size() - uBeginIdx))
uLength = 4*in_seq_data.size() - uBeginIdx;
// If entire sequence is being requested, just return
if((uBeginIdx == 0) && (uLength >= 4*in_seq_data.size()))
return uLength;
// Determine index in in_seq_data that holds uBeginIdx residue
TSeqPos uStart = uBeginIdx/4;
// Determine index within start byte
TSeqPos uStartInByte = 2 * (uBeginIdx % 4);
// Calculate masks
unsigned char rightMask = 0xff << uStartInByte;
unsigned char leftMask = ~rightMask;
// Determine index in in_seq_data that holds uBeginIdx + uLength
// residue
TSeqPos uEnd = (uBeginIdx + uLength - 1)/4;
// Get iterator for writting
vector<char>::iterator i_write;
// Determine begin and end of read
vector<char>::iterator i_read = in_seq_data.begin() + uStart;
vector<char>::iterator i_read_end = in_seq_data.begin() + uEnd;
// Loop through in_seq_data and copy data of desire
// sub sequence to begining of in_seq_data
for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
(*i_write) = (((*i_read) << uStartInByte) | leftMask) &
(((*(i_read+1)) >> (8-uStartInByte)) | rightMask);
++i_read;
}
// Handle last byte
(*i_write) = (*i_read) << uStartInByte;
// Shrink in_seq to to size needed
TSeqPos uSize = uLength/4;
if((uLength % 4) != 0)
uSize++;
in_seq_data.resize(uSize);
return uLength;
}
// Method to implement Keep for Ncbi4na. Returns length of
// kept sequence.
TSeqPos CSeqportUtil_implementation::KeepNcbi4na
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get a reference to in_seq
vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
// If uBeginIdx past the end of in_seq, return empty in_seq
if(uBeginIdx >= in_seq_data.size()*2)
{
in_seq_data.clear();
return 0;
}
// If uLength == 0, Keep from uBeginIdx to end of in_seq
if(uLength == 0)
uLength = 2*in_seq_data.size() - uBeginIdx;
// If uLength goes beyond the end of the sequence, trim
// it back to the end of the sequence
if(uLength > (2*in_seq_data.size() - uBeginIdx))
uLength = 2*in_seq_data.size() - uBeginIdx;
// If entire sequence is being requested, just return
if((uBeginIdx == 0) && (uLength >= 2*in_seq_data.size()))
return uLength;
// Determine index in in_seq_data that holds uBeginIdx residue
TSeqPos uStart = uBeginIdx/2;
// Determine index within start byte
unsigned int uStartInByte = 4 * (uBeginIdx % 2);
// Calculate masks
unsigned char rightMask = 0xff << uStartInByte;
unsigned char leftMask = ~rightMask;
// Determine index in in_seq_data that holds uBeginIdx + uLength
// residue
TSeqPos uEnd = (uBeginIdx + uLength - 1)/2;
// Get iterator for writting
vector<char>::iterator i_write;
// Determine begin and end of read
vector<char>::iterator i_read = in_seq_data.begin() + uStart;
vector<char>::iterator i_read_end = in_seq_data.begin() + uEnd;
// Loop through in_seq_data and copy data of desire
// sub sequence to begining of in_seq_data
for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
(*i_write) = (((*i_read) << uStartInByte) | leftMask) &
(((*(i_read+1)) >> (8-uStartInByte)) | rightMask);
++i_read;
}
// Handle last byte
(*i_write) = (*i_read) << uStartInByte;
// Shrink in_seq to to size needed
TSeqPos uSize = uLength/2;
if((uLength % 2) != 0)
uSize++;
in_seq_data.resize(uSize);
return uLength;
}
// Method to implement Keep for Iupacna. Return length
// of kept sequence
TSeqPos CSeqportUtil_implementation::KeepIupacna
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get a reference to in_seq
string& in_seq_data = in_seq->SetIupacna().Set();
// If uBeginIdx past end of in_seq, return empty in_seq
if(uBeginIdx >= in_seq_data.size())
{
in_seq_data.erase();
return 0;
}
// If uLength is 0, Keep from uBeginIdx to end of in_seq
if(uLength == 0)
uLength = in_seq_data.size() - uBeginIdx;
// Check that uLength does not go beyond end of in_seq
if((uBeginIdx + uLength) > in_seq_data.size())
uLength = in_seq_data.size() - uBeginIdx;
// If uBeginIdx == 0 and uLength == in_seq_data.size()
// just return as the entire sequence is being requested
if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
return uLength;
// Get two iterators on in_seq, one read and one write
string::iterator i_read;
string::iterator i_write;
// Determine begin and end of read
i_read = in_seq_data.begin() + uBeginIdx;
string::iterator i_read_end = i_read + uLength;
// Loop through in_seq for uLength bases
// and shift uBeginIdx to beginning
for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write)
{
(*i_write) = (*i_read);
++i_read;
}
// Resize in_seq_data to uLength
in_seq_data.resize(uLength);
return uLength;
}
// Method to implement Keep for Ncbieaa
TSeqPos CSeqportUtil_implementation::KeepNcbieaa
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get a reference to in_seq
string& in_seq_data = in_seq->SetNcbieaa().Set();
// If uBeginIdx past end of in_seq, return empty in_seq
if(uBeginIdx >= in_seq_data.size())
{
in_seq_data.erase();
return 0;
}
// If uLength is 0, Keep from uBeginIdx to end of in_seq
if(uLength == 0)
uLength = in_seq_data.size() - uBeginIdx;
// Check that uLength does not go beyond end of in_seq
if((uBeginIdx + uLength) > in_seq_data.size())
uLength = in_seq_data.size() - uBeginIdx;
// If uBeginIdx == 0 and uLength == in_seq_data.size()
// just return as the entire sequence is being requested
if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
return uLength;
// Get two iterators on in_seq, one read and one write
string::iterator i_read;
string::iterator i_write;
// Determine begin and end of read
i_read = in_seq_data.begin() + uBeginIdx;
string::iterator i_read_end = i_read + uLength;
// Loop through in_seq for uLength bases
// and shift uBeginIdx to beginning
for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
(*i_write) = (*i_read);
++i_read;
}
// Resize in_seq_data to uLength
in_seq_data.resize(uLength);
return uLength;
}
// Method to implement Keep for Ncbistdaa
TSeqPos CSeqportUtil_implementation::KeepNcbistdaa
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get a reference to in_seq
vector<char>& in_seq_data = in_seq->SetNcbistdaa().Set();
// If uBeginIdx past end of in_seq, return empty in_seq
if(uBeginIdx >= in_seq_data.size())
{
in_seq_data.clear();
return 0;
}
// If uLength is 0, Keep from uBeginIdx to end of in_seq
if(uLength == 0)
uLength = in_seq_data.size() - uBeginIdx;
// Check that uLength does not go beyond end of in_seq
if((uBeginIdx + uLength) > in_seq_data.size())
uLength = in_seq_data.size() - uBeginIdx;
// If uBeginIdx == 0 and uLength == in_seq_data.size()
// just return as the entire sequence is being requested
if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
return uLength;
// Get two iterators on in_seq, one read and one write
vector<char>::iterator i_read;
vector<char>::iterator i_write;
// Determine begin and end of read
i_read = in_seq_data.begin() + uBeginIdx;
vector<char>::iterator i_read_end = i_read + uLength;
// Loop through in_seq for uLength bases
// and shift uBeginIdx to beginning
for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
(*i_write) = (*i_read);
++i_read;
}
// Resize in_seq_data to uLength
in_seq_data.resize(uLength);
return uLength;
}
// Method to implement Keep for Iupacaa
TSeqPos CSeqportUtil_implementation::KeepIupacaa
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get a reference to in_seq
string& in_seq_data = in_seq->SetIupacaa().Set();
// If uBeginIdx past end of in_seq, return empty in_seq
if (uBeginIdx >= in_seq_data.size()) {
in_seq_data.erase();
return 0;
}
// If uLength is 0, Keep from uBeginIdx to end of in_seq
if(uLength == 0)
uLength = in_seq_data.size() - uBeginIdx;
// Check that uLength does not go beyond end of in_seq
if((uBeginIdx + uLength) > in_seq_data.size())
uLength = in_seq_data.size() - uBeginIdx;
// If uBeginIdx == 0 and uLength == in_seq_data.size()
// just return as the entire sequence is being requested
if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
return uLength;
// Get two iterators on in_seq, one read and one write
string::iterator i_read;
string::iterator i_write;
// Determine begin and end of read
i_read = in_seq_data.begin() + uBeginIdx;
string::iterator i_read_end = i_read + uLength;
// Loop through in_seq for uLength bases
// and shift uBeginIdx to beginning
for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
(*i_write) = (*i_read);
++i_read;
}
// Resize in_seq_data to uLength
in_seq_data.resize(uLength);
return uLength;
}
// Methods to complement na sequences
// In place methods
TSeqPos CSeqportUtil_implementation::ComplementIupacna
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Keep just the part of in_seq that will be complemented
TSeqPos uKept = KeepIupacna(in_seq, uBeginIdx, uLength);
// Get in_seq data
string& in_seq_data = in_seq->SetIupacna().Set();
// Get an iterator to in_seq_data
string::iterator i_data;
// Get end of iteration--needed for performance
string::iterator i_data_end = in_seq_data.end();
// Loop through the input sequence and complement it
for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
(*i_data) =
m_Iupacna_complement->m_Table[static_cast<unsigned char>(*i_data)];
return uKept;
}
TSeqPos CSeqportUtil_implementation::ComplementNcbi2na
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Keep just the part of in_seq that will be complemented
TSeqPos uKept = KeepNcbi2na(in_seq, uBeginIdx, uLength);
// Get in_seq data
vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
// Get an iterator to in_seq_data
vector<char>::iterator i_data;
// Get end of iteration
vector<char>::iterator i_data_end = in_seq_data.end();
// Loop through the input sequence and complement it
for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
(*i_data) =
m_Ncbi2naComplement->m_Table[static_cast<unsigned char>(*i_data)];
return uKept;
}
TSeqPos CSeqportUtil_implementation::ComplementNcbi4na
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Keep just the part of in_seq that will be complemented
TSeqPos uKept = KeepNcbi4na(in_seq, uBeginIdx, uLength);
// Get in_seq data
vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
// Get an iterator to in_seq_data
vector<char>::iterator i_data;
// Get end of iteration--done for performance
vector<char>::iterator i_data_end = in_seq_data.end();
// Loop through the input sequence and complement it
for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
(*i_data) =
m_Ncbi4naComplement->m_Table[static_cast<unsigned char>(*i_data)];
return uKept;
}
// Complement in copy methods
TSeqPos CSeqportUtil_implementation::ComplementIupacna
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
TSeqPos uKept = GetIupacnaCopy(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx1 = 0, uIdx2 = 0;
ComplementIupacna(out_seq, uIdx1, uIdx2);
return uKept;
}
TSeqPos CSeqportUtil_implementation::ComplementNcbi2na
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
TSeqPos uKept = GetNcbi2naCopy(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx1 = 0, uIdx2 = 0;
ComplementNcbi2na(out_seq, uIdx1, uIdx2);
return uKept;
}
TSeqPos CSeqportUtil_implementation::ComplementNcbi4na
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
TSeqPos uKept = GetNcbi4naCopy(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx1 = 0, uIdx2 = 0;
ComplementNcbi4na(out_seq, uIdx1, uIdx2);
return uKept;
}
// Methods to reverse na sequences
// In place methods
TSeqPos CSeqportUtil_implementation::ReverseIupacna
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Keep just the part of in_seq that will be reversed
TSeqPos uKept = KeepIupacna(in_seq, uBeginIdx, uLength);
// Get in_seq data
string& in_seq_data = in_seq->SetIupacna().Set();
// Reverse the order of the string
reverse(in_seq_data.begin(), in_seq_data.end());
return uKept;
}
TSeqPos CSeqportUtil_implementation::ReverseNcbi2na
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get a reference to in_seq data
vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
// Validate and adjust uBeginIdx and uLength
if(uBeginIdx >= 4*in_seq_data.size())
{
in_seq_data.erase(in_seq_data.begin(), in_seq_data.end());
return 0;
}
// If uLength is zero, set to end of sequence
if(uLength == 0)
uLength = 4*in_seq_data.size() - uBeginIdx;
// Ensure that uLength not beyond end of sequence
if((uBeginIdx + uLength) > (4 * in_seq_data.size()))
uLength = 4*in_seq_data.size() - uBeginIdx;
// Determine start and end bytes
TSeqPos uStart = uBeginIdx/4;
TSeqPos uEnd = uStart + (uLength - 1 +(uBeginIdx % 4))/4 + 1;
// Declare an iterator and get end of sequence
vector<char>::iterator i_in;
vector<char>::iterator i_in_begin = in_seq_data.begin() + uStart;
vector<char>::iterator i_in_end = in_seq_data.begin() + uEnd;
// Loop through in_seq_data and reverse residues in each byte
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*i_in) = m_Ncbi2naRev->m_Table[static_cast<unsigned char>(*i_in)];
// Reverse the bytes in the sequence
reverse(i_in_begin, i_in_end);
// Keep just the requested part of the sequence
TSeqPos uJagged = 3 - ((uBeginIdx + uLength - 1) % 4) + 4*uStart;
return KeepNcbi2na(in_seq, uJagged, uLength);
}
TSeqPos CSeqportUtil_implementation::ReverseNcbi4na
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
// Get a reference to in_seq data
vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
// Validate and adjust uBeginIdx and uLength
if(uBeginIdx >= 2*in_seq_data.size())
{
in_seq_data.erase(in_seq_data.begin(), in_seq_data.end());
return 0;
}
// If uLength is zero, set to end of sequence
if(uLength == 0)
uLength = 2*in_seq_data.size() - uBeginIdx;
// Ensure that uLength not beyond end of sequence
if((uBeginIdx + uLength) > (2 * in_seq_data.size()))
uLength = 2*in_seq_data.size() - uBeginIdx;
// Determine start and end bytes
TSeqPos uStart = uBeginIdx/2;
TSeqPos uEnd = uStart + (uLength - 1 +(uBeginIdx % 2))/2 + 1;
// Declare an iterator and get end of sequence
vector<char>::iterator i_in;
vector<char>::iterator i_in_begin = in_seq_data.begin() + uStart;
vector<char>::iterator i_in_end = in_seq_data.begin() + uEnd;
// Loop through in_seq_data and reverse residues in each byte
for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
(*i_in) = m_Ncbi4naRev->m_Table[static_cast<unsigned char>(*i_in)];
// Reverse the bytes in the sequence
reverse(i_in_begin, i_in_end);
// Keep just the requested part of the sequence
TSeqPos uJagged = 1 - ((uBeginIdx + uLength - 1) % 2) + 2*uStart;
return KeepNcbi4na(in_seq, uJagged, uLength);
}
// Reverse in copy methods
TSeqPos CSeqportUtil_implementation::ReverseIupacna
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
GetIupacnaCopy(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx1 = 0, uIdx2 = uLength;
return ReverseIupacna(out_seq, uIdx1, uIdx2);
}
TSeqPos CSeqportUtil_implementation::ReverseNcbi2na
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
GetNcbi2naCopy(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx1 = 0, uIdx2 = uLength;
return ReverseNcbi2na(out_seq, uIdx1, uIdx2);
}
TSeqPos CSeqportUtil_implementation::ReverseNcbi4na
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
GetNcbi4naCopy(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx1 = 0, uIdx2 = uLength;
return ReverseNcbi4na(out_seq, uIdx1, uIdx2);
}
// Methods to reverse-complement an na sequences
// In place methods
TSeqPos CSeqportUtil_implementation::ReverseComplementIupacna
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
ReverseIupacna(in_seq, uBeginIdx, uLength);
TSeqPos uIdx = 0;
return ComplementIupacna(in_seq, uIdx, uLength);
}
TSeqPos CSeqportUtil_implementation::ReverseComplementNcbi2na
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
ReverseNcbi2na(in_seq, uBeginIdx, uLength);
TSeqPos uIdx = 0;
return ComplementNcbi2na(in_seq, uIdx, uLength);
}
TSeqPos CSeqportUtil_implementation::ReverseComplementNcbi4na
(CSeq_data* in_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
ReverseNcbi4na(in_seq, uBeginIdx, uLength);
TSeqPos uIdx = 0;
return ComplementNcbi4na(in_seq, uIdx, uLength);
}
// Reverse in copy methods
TSeqPos CSeqportUtil_implementation::ReverseComplementIupacna
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
ReverseIupacna(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx = 0;
return ComplementIupacna(out_seq, uIdx, uLength);
}
TSeqPos CSeqportUtil_implementation::ReverseComplementNcbi2na
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
ReverseNcbi2na(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx = 0;
return ComplementNcbi2na(out_seq, uIdx, uLength);
}
TSeqPos CSeqportUtil_implementation::ReverseComplementNcbi4na
(const CSeq_data& in_seq,
CSeq_data* out_seq,
TSeqPos uBeginIdx,
TSeqPos uLength)
const
{
ReverseNcbi4na(in_seq, out_seq, uBeginIdx, uLength);
TSeqPos uIdx = 0;
return ComplementNcbi4na(out_seq, uIdx, uLength);
}
// Append methods
TSeqPos CSeqportUtil_implementation::AppendIupacna
(CSeq_data* out_seq,
const CSeq_data& in_seq1,
TSeqPos uBeginIdx1,
TSeqPos uLength1,
const CSeq_data& in_seq2,
TSeqPos uBeginIdx2,
TSeqPos uLength2)
const
{
// Get references to in_seqs
const string& in_seq1_data = in_seq1.GetIupacna().Get();
const string& in_seq2_data = in_seq2.GetIupacna().Get();
// Get a reference to out_seq
out_seq->Reset();
string& out_seq_data = out_seq->SetIupacna().Set();
// Validate and Adjust uBeginIdx_ and uLength_
if((uBeginIdx1 >= in_seq1_data.size()) &&
(uBeginIdx2 >= in_seq2_data.size()))
return 0;
if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
uLength1 = in_seq1_data.size() - uBeginIdx1;
if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
uLength2 = in_seq2_data.size() - uBeginIdx2;
// Append the strings
out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
return uLength1 + uLength2;
}
TSeqPos CSeqportUtil_implementation::AppendNcbi2na
(CSeq_data* out_seq,
const CSeq_data& in_seq1,
TSeqPos uBeginIdx1,
TSeqPos uLength1,
const CSeq_data& in_seq2,
TSeqPos uBeginIdx2,
TSeqPos uLength2)
const
{
// Get references to in_seqs
const vector<char>& in_seq1_data = in_seq1.GetNcbi2na().Get();
const vector<char>& in_seq2_data = in_seq2.GetNcbi2na().Get();
// Get a reference to out_seq
out_seq->Reset();
vector<char>& out_seq_data = out_seq->SetNcbi2na().Set();
// Handle case where both uBeginidx go beyond in_seq
if((uBeginIdx1 >= 4*in_seq1_data.size()) &&
(uBeginIdx2 >= 4*in_seq2_data.size()))
return 0;
// Handle case where uBeginIdx1 goes beyond end of in_seq1
if(uBeginIdx1 >= 4*in_seq1_data.size())
return GetNcbi2naCopy(in_seq2, out_seq, uBeginIdx2, uLength2);
// Handle case where uBeginIdx2 goes beyond end of in_seq2
if(uBeginIdx2 >= 4*in_seq2_data.size())
return GetNcbi2naCopy(in_seq1, out_seq, uBeginIdx1, uLength1);
// Validate and Adjust uBeginIdx_ and uLength_
if(((uBeginIdx1 + uLength1) > 4*in_seq1_data.size()) || uLength1 == 0)
uLength1 = 4*in_seq1_data.size() - uBeginIdx1;
if(((uBeginIdx2 + uLength2) > 4*in_seq2_data.size()) || uLength2 == 0)
uLength2 = 4*in_seq2_data.size() - uBeginIdx2;
// Resize out_seq_data to hold appended sequence
TSeqPos uTotalLength = uLength1 + uLength2;
if((uTotalLength % 4) == 0)
out_seq_data.resize(uTotalLength/4);
else
out_seq_data.resize(uTotalLength/4 + 1);
// Calculate bit shifts required for in_seq1
unsigned int lShift1 = 2*(uBeginIdx1 % 4);
unsigned int rShift1 = 8 - lShift1;
// Calculate bit shifts required for in_seq2
unsigned int lShift2, rShift2, uCase;
unsigned int uVacantIdx = 2*(uLength1 % 4);
unsigned int uStartIdx = 2*(uBeginIdx2 % 4);
if((uVacantIdx < uStartIdx) && (uVacantIdx > 0))
{
uCase = 0;
lShift2 = uStartIdx - uVacantIdx;
rShift2 = 8 - lShift2;
}
else if((uVacantIdx < uStartIdx) && (uVacantIdx == 0))
{
uCase = 1;
lShift2 = uStartIdx;
rShift2 = 8 - lShift2;
}
else if((uVacantIdx == uStartIdx) && (uVacantIdx > 0))
{
uCase = 2;
lShift2 = 0;
rShift2 = 8;
}
else if((uVacantIdx == uStartIdx) && (uVacantIdx == 0))
{
uCase = 3;
lShift2 = 0;
rShift2 = 8;
}
else
{
uCase = 4;
rShift2 = uVacantIdx - uStartIdx;
lShift2 = 8 - rShift2;
}
// Determine begin and end points for iterators.
TSeqPos uStart1 = uBeginIdx1/4;
TSeqPos uEnd1;
if(((uBeginIdx1 + uLength1) % 4) == 0)
uEnd1 = (uBeginIdx1 + uLength1)/4;
else
uEnd1 = (uBeginIdx1 + uLength1)/4 + 1;
TSeqPos uStart2 = uBeginIdx2/4;
TSeqPos uEnd2;
if(((uBeginIdx2 + uLength2) % 4) == 0)
uEnd2 = (uBeginIdx2 + uLength2)/4;
else
uEnd2 = (uBeginIdx2 + uLength2)/4 + 1;
// Get begin and end positions on in_seqs
vector<char>::const_iterator i_in1_begin = in_seq1_data.begin() + uStart1;
vector<char>::const_iterator i_in1_end = in_seq1_data.begin() + uEnd1 - 1;
vector<char>::const_iterator i_in2_begin = in_seq2_data.begin() + uStart2;
vector<char>::const_iterator i_in2_end = in_seq2_data.begin() + uEnd2;
// Declare iterators
vector<char>::iterator i_out = out_seq_data.begin() - 1;
vector<char>::const_iterator i_in1;
vector<char>::const_iterator i_in2;
// Insert in_seq1 into out_seq
for(i_in1 = i_in1_begin; i_in1 != i_in1_end; ++i_in1)
(*(++i_out)) = ((*i_in1) << lShift1) | ((*(i_in1+1) & 255) >> rShift1);
// Handle last byte for in_seq1 if necessary
TSeqPos uEndOutByte;
if((uLength1 % 4) == 0)
uEndOutByte = uLength1/4 - 1;
else
uEndOutByte = uLength1/4;
if(i_out != (out_seq_data.begin() + uEndOutByte))
(*(++i_out)) = (*i_in1) << lShift1;
// Connect in_seq1 and in_seq2
unsigned char uMask1 = 255 << (8 - 2*(uLength1 % 4));
unsigned char uMask2 = 255 >> (2*(uBeginIdx2 % 4));
TSeqPos uSeq2Inc = 1;
switch (uCase) {
case 0: // 0 < uVacantIdx < uStartIdx
if((i_in2_begin + 1) == i_in2_end)
{
(*i_out) &= uMask1;
(*i_out) |= ((*i_in2_begin) & uMask2) << lShift2;
return uTotalLength;
}
else
{
(*i_out) &= uMask1;
(*i_out) |=
(((*i_in2_begin) & uMask2) << lShift2) |
(((*(i_in2_begin+1)) & 255) >> rShift2);
}
break;
case 1: // 0 == uVacantIdx < uStartIdx
if((i_in2_begin + 1) == i_in2_end)
{
(*(++i_out)) = (*i_in2_begin) << lShift2;
return uTotalLength;
}
else
{
(*(++i_out)) =
((*i_in2_begin) << lShift2) |
(((*(i_in2_begin+1)) & 255) >> rShift2);
}
break;
case 2: // uVacantIdx == uStartIdx > 0
(*i_out) &= uMask1;
(*i_out) |= (*i_in2_begin) & uMask2;
if((i_in2_begin + 1) == i_in2_end)
return uTotalLength;
break;
case 3: // uVacantIdx == uStartIdx == 0
(*(++i_out)) = (*i_in2_begin);
if((i_in2_begin + 1) == i_in2_end)
return uTotalLength;
break;
case 4: // uVacantIdx > uStartIdx
if((i_in2_begin + 1) == i_in2_end)
{
(*i_out) &= uMask1;
(*i_out) |= ((*i_in2_begin) & uMask2) >> rShift2;
if(++i_out != out_seq_data.end())
(*i_out) = (*i_in2_begin) << lShift2;
return uTotalLength;
}
else
{
(*i_out) &= uMask1;
(*i_out) |=
(((*i_in2_begin) & uMask2) >> rShift2) |
((*(i_in2_begin+1) & ~uMask2) << lShift2);
uSeq2Inc = 0;
}
}
// Insert in_seq2 into out_seq
for(i_in2 = i_in2_begin+uSeq2Inc; (i_in2 != i_in2_end) &&
((i_in2+1) != i_in2_end); ++i_in2) {
(*(++i_out)) = ((*i_in2) << lShift2) | ((*(i_in2+1) & 255) >> rShift2);
}
// Handle last byte for in_seq2, if there is one
if((++i_out != out_seq_data.end()) && (i_in2 != i_in2_end))
(*i_out) = (*i_in2) << lShift2;
return uLength1 + uLength2;
}
TSeqPos CSeqportUtil_implementation::AppendNcbi4na
(CSeq_data* out_seq,
const CSeq_data& in_seq1,
TSeqPos uBeginIdx1,
TSeqPos uLength1,
const CSeq_data& in_seq2,
TSeqPos uBeginIdx2,
TSeqPos uLength2)
const
{
// Get references to in_seqs
const vector<char>& in_seq1_data = in_seq1.GetNcbi4na().Get();
const vector<char>& in_seq2_data = in_seq2.GetNcbi4na().Get();
// Get a reference to out_seq
out_seq->Reset();
vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
// Handle both uBeginidx go beyond end of in_seq
if((uBeginIdx1 >= 4*in_seq1_data.size()) &&
(uBeginIdx2 >= 4*in_seq2_data.size()))
return 0;
// Handle case where uBeginIdx1 goes beyond end of in_seq1
if(uBeginIdx1 >= 4*in_seq1_data.size())
return GetNcbi4naCopy(in_seq2, out_seq, uBeginIdx2, uLength2);
// Handle case where uBeginIdx2 goes beyond end of in_seq2
if(uBeginIdx2 >= 4*in_seq2_data.size())
return GetNcbi4naCopy(in_seq1, out_seq, uBeginIdx1, uLength1);
// Validate and Adjust uBeginIdx_ and uLength_
if(((uBeginIdx1 + uLength1) > 2*in_seq1_data.size()) || uLength1 == 0)
uLength1 = 2*in_seq1_data.size() - uBeginIdx1;
if(((uBeginIdx2 + uLength2) > 2*in_seq2_data.size()) || uLength2 == 0)
uLength2 = 2*in_seq2_data.size() - uBeginIdx2;
// Resize out_seq_data to hold appended sequence
TSeqPos uTotalLength = uLength1 + uLength2;
if((uTotalLength % 2) == 0)
out_seq_data.resize(uTotalLength/2);
else
out_seq_data.resize(uTotalLength/2 + 1);
// Calculate bit shifts required for in_seq1
unsigned int lShift1 = 4*(uBeginIdx1 % 2);
unsigned int rShift1 = 8 - lShift1;
// Calculate bit shifts required for in_seq2
unsigned int lShift2, rShift2, uCase;
unsigned int uVacantIdx = 4*(uLength1 % 2);
unsigned int uStartIdx = 4*(uBeginIdx2 % 2);
if((uVacantIdx < uStartIdx))
{
uCase = 1;
lShift2 = uStartIdx;
rShift2 = 8 - lShift2;
}
else if((uVacantIdx == uStartIdx) && (uVacantIdx > 0))
{
uCase = 2;
lShift2 = 0;
rShift2 = 8;
}
else if((uVacantIdx == uStartIdx) && (uVacantIdx == 0))
{
uCase = 3;
lShift2 = 0;
rShift2 = 8;
}
else
{
uCase = 4;
rShift2 = uVacantIdx - uStartIdx;
lShift2 = 8 - rShift2;
}
// Determine begin and end points for iterators.
TSeqPos uStart1 = uBeginIdx1/2;
TSeqPos uEnd1;
if(((uBeginIdx1 + uLength1) % 2) == 0)
uEnd1 = (uBeginIdx1 + uLength1)/2;
else
uEnd1 = (uBeginIdx1 + uLength1)/2 + 1;
TSeqPos uStart2 = uBeginIdx2/2;
TSeqPos uEnd2;
if(((uBeginIdx2 + uLength2) % 2) == 0)
uEnd2 = (uBeginIdx2 + uLength2)/2;
else
uEnd2 = (uBeginIdx2 + uLength2)/2 + 1;
// Get begin and end positions on in_seqs
vector<char>::const_iterator i_in1_begin = in_seq1_data.begin() + uStart1;
vector<char>::const_iterator i_in1_end = in_seq1_data.begin() + uEnd1 - 1;
vector<char>::const_iterator i_in2_begin = in_seq2_data.begin() + uStart2;
vector<char>::const_iterator i_in2_end = in_seq2_data.begin() + uEnd2;
// Declare iterators
vector<char>::iterator i_out = out_seq_data.begin() - 1;
vector<char>::const_iterator i_in1;
vector<char>::const_iterator i_in2;
// Insert in_seq1 into out_seq
for(i_in1 = i_in1_begin; i_in1 != i_in1_end; ++i_in1)
(*(++i_out)) = ((*i_in1) << lShift1) | ((*(i_in1+1) & 255) >> rShift1);
// Handle last byte for in_seq1 if necessary
TSeqPos uEndOutByte;
if((uLength1 % 2) == 0)
uEndOutByte = uLength1/2 - 1;
else
uEndOutByte = uLength1/2;
if(i_out != (out_seq_data.begin() + uEndOutByte))
(*(++i_out)) = (*i_in1) << lShift1;
// Connect in_seq1 and in_seq2
unsigned char uMask1 = 255 << (8 - 4*(uLength1 % 2));
unsigned char uMask2 = 255 >> (4*(uBeginIdx2 % 2));
TSeqPos uSeq2Inc = 1;
switch (uCase) {
case 1: // 0 == uVacantIdx < uStartIdx
if((i_in2_begin+1) == i_in2_end)
{
(*(++i_out)) = (*i_in2_begin) << lShift2;
return uTotalLength;
}
else
{
(*(++i_out)) =
((*i_in2_begin) << lShift2) |
(((*(i_in2_begin+1)) & 255) >> rShift2);
}
break;
case 2: // uVacantIdx == uStartIdx > 0
(*i_out) &= uMask1;
(*i_out) |= (*i_in2_begin) & uMask2;
if((i_in2_begin+1) == i_in2_end)
return uTotalLength;
break;
case 3: // uVacantIdx == uStartIdx == 0
(*(++i_out)) = (*i_in2_begin);
if((i_in2_begin+1) == i_in2_end)
return uTotalLength;
break;
case 4: // uVacantIdx > uStartIdx
if((i_in2_begin+1) == i_in2_end)
{
(*i_out) &= uMask1;
(*i_out) |= ((*i_in2_begin) & uMask2) >> rShift2;
if(++i_out != out_seq_data.end())
(*i_out) = (*i_in2_begin) << lShift2;
return uTotalLength;
}
else
{
(*i_out) &= uMask1;
(*i_out) |=
(((*i_in2_begin) & uMask2) >> rShift2) |
((*(i_in2_begin+1) & ~uMask2) << lShift2);
uSeq2Inc = 0;
}
}
// Insert in_seq2 into out_seq
for(i_in2 = i_in2_begin+uSeq2Inc; (i_in2 != i_in2_end) &&
((i_in2+1) != i_in2_end); ++i_in2) {
(*(++i_out)) =
((*i_in2) << lShift2) | ((*(i_in2+1) & 255) >> rShift2);
}
// Handle last byte for in_seq2, if there is one
if((++i_out != out_seq_data.end()) && (i_in2 != i_in2_end))
(*i_out) = (*i_in2) << lShift2;
return uTotalLength;
}
TSeqPos CSeqportUtil_implementation::AppendNcbieaa
(CSeq_data* out_seq,
const CSeq_data& in_seq1,
TSeqPos uBeginIdx1,
TSeqPos uLength1,
const CSeq_data& in_seq2,
TSeqPos uBeginIdx2,
TSeqPos uLength2)
const
{
// Get references to in_seqs
const string& in_seq1_data = in_seq1.GetNcbieaa().Get();
const string& in_seq2_data = in_seq2.GetNcbieaa().Get();
// Get a reference to out_seq
out_seq->Reset();
string& out_seq_data = out_seq->SetNcbieaa().Set();
// Validate and Adjust uBeginIdx_ and uLength_
if((uBeginIdx1 >= in_seq1_data.size()) &&
(uBeginIdx2 >= in_seq2_data.size()))
{
return 0;
}
if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
uLength1 = in_seq1_data.size() - uBeginIdx1;
if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
uLength2 = in_seq2_data.size() - uBeginIdx2;
// Append the strings
out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
return uLength1 + uLength2;
}
TSeqPos CSeqportUtil_implementation::AppendNcbistdaa
(CSeq_data* out_seq,
const CSeq_data& in_seq1,
TSeqPos uBeginIdx1,
TSeqPos uLength1,
const CSeq_data& in_seq2,
TSeqPos uBeginIdx2,
TSeqPos uLength2)
const
{
// Get references to in_seqs
const vector<char>& in_seq1_data = in_seq1.GetNcbistdaa().Get();
const vector<char>& in_seq2_data = in_seq2.GetNcbistdaa().Get();
// Get a reference to out_seq
out_seq->Reset();
vector<char>& out_seq_data = out_seq->SetNcbistdaa().Set();
// Validate and Adjust uBeginIdx_ and uLength_
if((uBeginIdx1 >= in_seq1_data.size()) &&
(uBeginIdx2 >= in_seq2_data.size()))
return 0;
if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
uLength1 = in_seq1_data.size() - uBeginIdx1;
if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
uLength2 = in_seq2_data.size() - uBeginIdx2;
// Get begin and end positions on in_seqs
vector<char>::const_iterator i_in1_begin =
in_seq1_data.begin() + uBeginIdx1;
vector<char>::const_iterator i_in1_end = i_in1_begin + uLength1;
vector<char>::const_iterator i_in2_begin =
in_seq2_data.begin() + uBeginIdx2;
vector<char>::const_iterator i_in2_end = i_in2_begin + uLength2;
// Insert the in_seqs into out_seq
out_seq_data.insert(out_seq_data.end(), i_in1_begin, i_in1_end);
out_seq_data.insert(out_seq_data.end(), i_in2_begin, i_in2_end);
return uLength1 + uLength2;
}
TSeqPos CSeqportUtil_implementation::AppendIupacaa
(CSeq_data* out_seq,
const CSeq_data& in_seq1,
TSeqPos uBeginIdx1,
TSeqPos uLength1,
const CSeq_data& in_seq2,
TSeqPos uBeginIdx2,
TSeqPos uLength2)
const
{
// Get references to in_seqs
const string& in_seq1_data = in_seq1.GetIupacaa().Get();
const string& in_seq2_data = in_seq2.GetIupacaa().Get();
// Get a reference to out_seq
out_seq->Reset();
string& out_seq_data = out_seq->SetIupacaa().Set();
// Validate and Adjust uBeginIdx_ and uLength_
if((uBeginIdx1 >= in_seq1_data.size()) &&
(uBeginIdx2 >= in_seq2_data.size()))
{
return 0;
}
if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
uLength1 = in_seq1_data.size() - uBeginIdx1;
if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
uLength2 = in_seq2_data.size() - uBeginIdx2;
// Append the strings
out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
return uLength1 + uLength2;
}
// Returns the 3 letter Iupacaa3 code for an ncbistdaa index
const string& CSeqportUtil_implementation::GetIupacaa3
(TIndex ncbistdaa)
{
return GetCodeOrName(eSeq_code_type_iupacaa3, ncbistdaa, true);
}
// Returns true if code type is available
bool CSeqportUtil_implementation::IsCodeAvailable
(CSeq_data::E_Choice code_type)
{
if (code_type == CSeq_data::e_not_set) {
return false;
} else {
return IsCodeAvailable(EChoiceToESeq(code_type));
}
}
// Return true if code type is available
bool CSeqportUtil_implementation::IsCodeAvailable (ESeq_code_type code_type)
{
typedef list<CRef<CSeq_code_table> > Ttables;
// Iterate through Seq-code-set looking for code type
ITERATE (Ttables, i_ct, m_SeqCodeSet->GetCodes()) {
if((*i_ct)->GetCode() == code_type) {
return true;
}
}
return false;
}
// Return a pair containing the first index (start-at) and last index
// for code_type.
CSeqportUtil::TPair CSeqportUtil_implementation::GetCodeIndexFromTo
(CSeq_data::E_Choice code_type)
{
return GetCodeIndexFromTo(EChoiceToESeq(code_type));
}
// Return a pair containing the first index (start-at) and last index
// for code_type.
CSeqportUtil::TPair CSeqportUtil_implementation::GetCodeIndexFromTo
(ESeq_code_type code_type)
{
typedef list<CRef<CSeq_code_table> > Ttables;
// Iterate through Seq-code-set looking for code type
TPair p;
ITERATE (Ttables, i_ct, m_SeqCodeSet->GetCodes()) {
if((*i_ct)->GetCode() == code_type) {
if ( (*i_ct)->IsSetStart_at() ) {
p.first = static_cast<TIndex>((*i_ct)->GetStart_at());
} else {
p.first = 0;
}
p.second = p.first + static_cast<TIndex>((*i_ct)->GetNum() - 1);
return p;
}
}
throw CSeqportUtil::CBadType("GetCodeIndexFromTo");
}
// Converts CSeq_data::E_Choice type to ESeq_code_type
// and calls overloaded GetCodeOrName()
const string& CSeqportUtil_implementation::GetCodeOrName
(CSeq_data::E_Choice code_type,
TIndex idx,
bool get_code)
{
return GetCodeOrName(EChoiceToESeq(code_type), idx, get_code);
}
// Returns the code (symbol) of type code_type for index idx.
const string& CSeqportUtil_implementation::GetCodeOrName
(ESeq_code_type code_type,
TIndex idx,
bool get_code)
{
typedef list<CRef<CSeq_code_table> > Ttables;
typedef list<CRef<CSeq_code_table::C_E> > Tcodes;
if ( !m_IndexString[get_code][code_type-1].size() ) {
throw CSeqportUtil::CBadType("GetCodeOrName");
}
idx -= m_StartAt[code_type-1];
if (idx >= m_IndexString[get_code][code_type-1].size()) {
throw CSeqportUtil::CBadIndex(idx, "GetCodeOrName");
}
return m_IndexString[get_code][code_type-1][idx];
}
// Converts CSeq_data::E_Choice type to ESeq_code_type and call
// overloaded GetIndex();
CSeqportUtil::TIndex CSeqportUtil_implementation::GetIndex
(CSeq_data::E_Choice code_type,
const string& code)
{
return GetIndex(EChoiceToESeq(code_type), code);
}
// Get the index for code of type code_type. If not found, return -1
CSeqportUtil::TIndex CSeqportUtil_implementation::GetIndex
(ESeq_code_type code_type,
const string& code)
{
typedef list<CRef<CSeq_code_table> > Ttables;
typedef list<CRef<CSeq_code_table::C_E> > Tcodes;
// Iterator to a map mapping a string code to a code index
map<string, TIndex>::const_iterator pos;
if ( !m_StringIndex[code_type-1].size() ) {
throw CSeqportUtil::CBadType("GetIndex");
}
pos = m_StringIndex[code_type-1].find(code);
if (pos != m_StringIndex[code_type-1].end()) {
return pos->second;
} else {
throw CSeqportUtil::CBadSymbol(code, "GetIndex");
}
}
// Gets complement of index for code type. Returns -1 if code
// type does not exist
CSeqportUtil::TIndex CSeqportUtil_implementation::GetIndexComplement
(CSeq_data::E_Choice code_type,
TIndex idx)
{
return GetIndexComplement(EChoiceToESeq(code_type), idx);
}
// Returns the complement of the index for code_type. If code_type
// does not exist, or complements for code_type do not exist,
// returns -1
CSeqportUtil::TIndex CSeqportUtil_implementation::GetIndexComplement
(ESeq_code_type code_type,
TIndex idx)
{
// Check that code is available
if (!m_IndexComplement[code_type-1].size()) {
throw CSeqportUtil::CBadType("GetIndexComplement");
}
// Check that idx is in range of code indices
idx -= m_StartAt[code_type-1];
if ( idx >= m_IndexComplement[code_type-1].size() ) {
throw CSeqportUtil::CBadIndex(idx, "GetIndexComplement");
}
// Return the index of the complement
return m_IndexComplement[code_type-1][idx];
}
CSeqportUtil::TIndex CSeqportUtil_implementation::GetMapToIndex
(CSeq_data::E_Choice from_type,
CSeq_data::E_Choice to_type,
TIndex from_idx)
{
return GetMapToIndex(EChoiceToESeq(from_type),
EChoiceToESeq(to_type),
from_idx);
}
CSeqportUtil::TIndex CSeqportUtil_implementation::GetMapToIndex
(ESeq_code_type from_type,
ESeq_code_type to_type,
TIndex from_idx)
{
CMap_table* Map = 0;
if (from_type == eSeq_code_type_iupacna) {
if (to_type == eSeq_code_type_ncbi2na) {
Map = m_IupacnaNcbi2na.GetPointer();
} else if (to_type == eSeq_code_type_ncbi4na) {
Map = m_IupacnaNcbi4na.GetPointer();
}
} else if (from_type == eSeq_code_type_ncbi4na) {
if (to_type == eSeq_code_type_iupacna) {
Map = m_Ncbi4naIupacna.GetPointer();
} else if (to_type == eSeq_code_type_ncbi2na) {
Map = m_Ncbi4naNcbi2na.GetPointer();
}
} else if (from_type == eSeq_code_type_ncbi2na) {
if (to_type == eSeq_code_type_iupacna) {
Map = m_Ncbi2naIupacna.GetPointer();
} else if (to_type == eSeq_code_type_ncbi4na) {
Map = m_Ncbi2naNcbi4na.GetPointer();
}
} else if (from_type == eSeq_code_type_iupacaa) {
if (to_type == eSeq_code_type_ncbieaa) {
Map = m_IupacaaNcbieaa.GetPointer();
} else if (to_type == eSeq_code_type_ncbistdaa) {
Map = m_IupacaaNcbistdaa.GetPointer();
}
} else if (from_type == eSeq_code_type_ncbieaa) {
if (to_type == eSeq_code_type_iupacaa) {
Map = m_NcbieaaIupacaa.GetPointer();
} else if (to_type == eSeq_code_type_ncbistdaa) {
Map = m_NcbieaaNcbistdaa.GetPointer();
}
} else if (from_type == eSeq_code_type_ncbistdaa) {
if (to_type == eSeq_code_type_iupacaa) {
Map = m_NcbistdaaIupacaa.GetPointer();
} else if (to_type == eSeq_code_type_ncbieaa) {
Map = m_NcbistdaaNcbieaa.GetPointer();
}
}
// Check that requested map is available
if (!Map) {
throw CSeqportUtil::CBadType("GetMapToIndex");
}
// Check that from_idx is within range of from_type
if (from_idx - (*Map).m_StartAt >= (TIndex)(*Map).m_Size) {
throw CSeqportUtil::CBadIndex(from_idx - (*Map).m_StartAt,
"GetMapToIndex");
}
// Return map value
return (*Map).m_Table[from_idx];
}
void CSeqportUtil_implementation::x_GetSeqFromSeqData
(const CSeq_data& data,
const string** str,
const vector<char>** vec)
const
{
*str = 0;
*vec = 0;
switch ( data.Which() ) {
case CSeq_data::e_Iupacna:
*str = &(data.GetIupacna().Get());
break;
case CSeq_data::e_Ncbi2na:
*vec = &(data.GetNcbi2na().Get());
break;
case CSeq_data::e_Ncbi4na:
*vec = &(data.GetNcbi4na().Get());
break;
case CSeq_data::e_Ncbi8na:
*vec = &(data.GetNcbi8na().Get());
break;
case CSeq_data::e_Iupacaa:
*str = &(data.GetIupacaa().Get());
break;
case CSeq_data::e_Ncbi8aa:
*vec = &(data.GetNcbi8aa().Get());
break;
case CSeq_data::e_Ncbieaa:
*str = &(data.GetNcbieaa().Get());
break;
case CSeq_data::e_Ncbistdaa:
*vec = &(data.GetNcbistdaa().Get());
break;
} // end of switch statemen
}
// same as above, but takes a non-const CSeq_data object.
void CSeqportUtil_implementation::x_GetSeqFromSeqData
(CSeq_data& data,
string** str,
vector<char>** vec)
const
{
*str = 0;
*vec = 0;
switch ( data.Which() ) {
case CSeq_data::e_Iupacna:
*str = &(data.SetIupacna().Set());
break;
case CSeq_data::e_Ncbi2na:
*vec = &(data.SetNcbi2na().Set());
break;
case CSeq_data::e_Ncbi4na:
*vec = &(data.SetNcbi4na().Set());
break;
case CSeq_data::e_Ncbi8na:
*vec = &(data.SetNcbi8na().Set());
break;
case CSeq_data::e_Iupacaa:
*str = &(data.SetIupacaa().Set());
break;
case CSeq_data::e_Ncbi8aa:
*vec = &(data.SetNcbi8aa().Set());
break;
case CSeq_data::e_Ncbieaa:
*str = &(data.SetNcbieaa().Set());
break;
case CSeq_data::e_Ncbistdaa:
*vec = &(data.SetNcbistdaa().Set());
break;
} // end of switch statemen
}
/////////////////////////////////////////////////////////////////////////////
// CSeqportUtil_implementation::sm_StrAsnData -- some very long and ugly string
//
// local copy of seqcode.prt sequence alphabet and conversion table ASN.1
const char* CSeqportUtil_implementation::sm_StrAsnData[] =
{
"-- This is the set of NCBI sequence code tablesn",
"-- J.Ostell 10/18/91n",
"--n",
"n",
"Seq-code-set ::= {n",
" codes { -- codesn",
" { -- IUPACnan",
" code iupacna ,n",
" num 25 , -- continuous 65-89n",
" one-letter TRUE , -- all one letter codesn",
" start-at 65 , -- starts with A, ASCII 65n",
" table {n",
" { symbol "A", name "Adenine" },n",
" { symbol "B" , name "G or T or C" },n",
" { symbol "C", name "Cytosine" },n",
" { symbol "D", name "G or A or T" },n",
" { symbol "", name "" },n",
" { symbol "", name "" },n",
" { symbol "G", name "Guanine" },n",
" { symbol "H", name "A or C or T" } ,n",
" { symbol "", name "" },n",
" { symbol "", name "" },n",
" { symbol "K", name "G or T" },n",
" { symbol "", name ""},n",
" { symbol "M", name "A or C" },n",
" { symbol "N", name "A or G or C or T" } ,n",
" { symbol "", name "" },n",
" { symbol "", name "" },n",
" { symbol "", name ""},n",
" { symbol "R", name "G or A"},n",
" { symbol "S", name "G or C"},n",
" { symbol "T", name "Thymine"},n",
" { symbol "", name ""},n",
" { symbol "V", name "G or C or A"},n",
" { symbol "W", name "A or T" },n",
" { symbol "", name ""},n",
" { symbol "Y", name "T or C"}n",
" } , -- end of tablen",
" comps { -- complementsn",
" 84,n",
" 86,n",
" 71,n",
" 72,n",
" 69,n",
" 70,n",
" 67,n",
" 68,n",
" 73,n",
" 74,n",
" 77,n",
" 76,n",
" 75,n",
" 78,n",
" 79,n",
" 80,n",
" 81,n",
" 89,n",
" 83,n",
" 65,n",
" 85,n",
" 66,n",
" 87,n",
" 88,n",
" 82n",
" }n",
" },n",
" { -- IUPACaan",
" code iupacaa ,n",
" num 26 , -- continuous 65-90n",
" one-letter TRUE , -- all one letter codesn",
" start-at 65 , -- starts with A, ASCII 65n",
" table {n",
" { symbol "A", name "Alanine" },n",
" { symbol "B" , name "Asp or Asn" },n",
" { symbol "C", name "Cysteine" },n",
" { symbol "D", name "Aspartic Acid" },n",
" { symbol "E", name "Glutamic Acid" },n",
" { symbol "F", name "Phenylalanine" },n",
" { symbol "G", name "Glycine" },n",
" { symbol "H", name "Histidine" } ,n",
" { symbol "I", name "Isoleucine" },n",
" { symbol "", name "" },n",
" { symbol "K", name "Lysine" },n",
" { symbol "L", name "Leucine" },n",
" { symbol "M", name "Methionine" },n",
" { symbol "N", name "Asparagine" } ,n",
" { symbol "", name "" },n",
" { symbol "P", name "Proline" },n",
" { symbol "Q", name "Glutamine"},n",
" { symbol "R", name "Arginine"},n",
" { symbol "S", name "Serine"},n",
" { symbol "T", name "Threonine"},n",
" { symbol "U", name "Selenocysteine"}, -- was emptyn"
" { symbol "V", name "Valine"},n",
" { symbol "W", name "Tryptophan" },n",
" { symbol "X", name "Undetermined or atypical"},n",
" { symbol "Y", name "Tyrosine"},n",
" { symbol "Z", name "Glu or Gln" }n",
" } -- end of table n",
" },n",
" { -- IUPACeaan",
" code ncbieaa ,n",
" num 49 , -- continuous 42-90n",
" one-letter TRUE , -- all one letter codesn",
" start-at 42 , -- starts with *, ASCII 42n",
" table {n",
" { symbol "*", name "Termination" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "-", name "Gap" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "", name "" } ,n",
" { symbol "A", name "Alanine" },n",
" { symbol "B" , name "Asp or Asn" },n",
" { symbol "C", name "Cysteine" },n",
" { symbol "D", name "Aspartic Acid" },n",
" { symbol "E", name "Glutamic Acid" },n",
" { symbol "F", name "Phenylalanine" },n",
" { symbol "G", name "Glycine" },n",
" { symbol "H", name "Histidine" } ,n",
" { symbol "I", name "Isoleucine" },n",
" { symbol "", name "" },n",
" { symbol "K", name "Lysine" },n",
" { symbol "L", name "Leucine" },n",
" { symbol "M", name "Methionine" },n",
" { symbol "N", name "Asparagine" } ,n",
" { symbol "", name "" },n",
" { symbol "P", name "Proline" },n",
" { symbol "Q", name "Glutamine"},n",
" { symbol "R", name "Arginine"},n",
" { symbol "S", name "Serine"},n",
" { symbol "T", name "Threonine"},n",
" { symbol "U", name "Selenocysteine"},n",
" { symbol "V", name "Valine"},n",
" { symbol "W", name "Tryptophan" },n",
" { symbol "X", name "Undetermined or atypical"},n",
" { symbol "Y", name "Tyrosine"},n",
" { symbol "Z", name "Glu or Gln" }n",
" } -- end of tablen",
" },n",
" { -- IUPACaa3n",
" code iupacaa3 ,n",
" num 26 , -- continuous 0-25n",
" one-letter FALSE , -- all 3 letter codesn",
" table {n",
" { symbol "---", name "Gap" } ,n",
" { symbol "Ala", name "Alanine" },n",
" { symbol "Asx" , name "Asp or Asn" },n",
" { symbol "Cys", name "Cysteine" },n",
" { symbol "Asp", name "Aspartic Acid" },n",
" { symbol "Glu", name "Glutamic Acid" },n",
" { symbol "Phe", name "Phenylalanine" },n",
" { symbol "Gly", name "Glycine" },n",
" { symbol "His", name "Histidine" } ,n",
" { symbol "Ile", name "Isoleucine" },n",
" { symbol "Lys", name "Lysine" },n",
" { symbol "Leu", name "Leucine" },n",
" { symbol "Met", name "Methionine" },n",
" { symbol "Asn", name "Asparagine" } ,n",
" { symbol "Pro", name "Proline" },n",
" { symbol "Gln", name "Glutamine"},n",
" { symbol "Arg", name "Arginine"},n",
" { symbol "Ser", name "Serine"},n",
" { symbol "Thr", name "Threonine"},n",
" { symbol "Val", name "Valine"},n",
" { symbol "Trp", name "Tryptophan" },n",
" { symbol "Xxx", name "Undetermined or atypical"},n",
" { symbol "Tyr", name "Tyrosine"},n",
" { symbol "Glx", name "Glu or Gln" },n",
" { symbol "Sec", name "Selenocysteine"},n",
" { symbol "Ter", name "Termination" } n",
" } -- end of tablen",
" },n",
" { -- NCBIstdaan",
" code ncbistdaa ,n",
" num 26 , -- continuous 0-25n",
" one-letter TRUE , -- all one letter codesn",
" table {n",
" { symbol "-", name "Gap" } , -- 0n",
" { symbol "A", name "Alanine" }, -- 1n",
" { symbol "B" , name "Asp or Asn" }, -- 2n",
" { symbol "C", name "Cysteine" }, -- 3n",
" { symbol "D", name "Aspartic Acid" }, -- 4n",
" { symbol "E", name "Glutamic Acid" }, -- 5n",
" { symbol "F", name "Phenylalanine" }, -- 6n",
" { symbol "G", name "Glycine" }, -- 7n",
" { symbol "H", name "Histidine" } , -- 8n",
" { symbol "I", name "Isoleucine" }, -- 9n",
" { symbol "K", name "Lysine" }, -- 10n",
" { symbol "L", name "Leucine" }, -- 11n",
" { symbol "M", name "Methionine" }, -- 12n",
" { symbol "N", name "Asparagine" } , -- 13n",
" { symbol "P", name "Proline" }, -- 14n",
" { symbol "Q", name "Glutamine"}, -- 15n",
" { symbol "R", name "Arginine"}, -- 16n",
" { symbol "S", name "Serine"}, -- 17n",
" { symbol "T", name "Threoine"}, -- 18n",
" { symbol "V", name "Valine"}, -- 19n",
" { symbol "W", name "Tryptophan" }, -- 20n",
" { symbol "X", name "Undetermined or atypical"}, -- 21n",
" { symbol "Y", name "Tyrosine"}, -- 22n",
" { symbol "Z", name "Glu or Gln" }, -- 23n",
" { symbol "U", name "Selenocysteine"}, -- 24 n",
" { symbol "*", name "Termination" } -- 25n",
" } -- end of table n",
" },n",
" { -- NCBI2nan",
" code ncbi2na ,n",
" num 4 , -- continuous 0-3n",
" one-letter TRUE , -- all one letter codesn",
" table {n",
" { symbol "A", name "Adenine" },n",
" { symbol "C", name "Cytosine" },n",
" { symbol "G", name "Guanine" },n",
" { symbol "T", name "Thymine/Uracil"}n",
" } , -- end of tablen",
" comps { -- complementsn",
" 3,n",
" 2,n",
" 1,n",
" 0n",
" }n",
" },n",
" { -- NCBI4nan",
" code ncbi4na ,n",
" num 16 , -- continuous 0-15n",
" one-letter TRUE , -- all one letter codesn",
" table {n",
" { symbol "-", name "Gap" } ,n",
" { symbol "A", name "Adenine" },n",
" { symbol "C", name "Cytosine" },n",
" { symbol "M", name "A or C" },n",
" { symbol "G", name "Guanine" },n",
" { symbol "R", name "G or A"},n",
" { symbol "S", name "G or C"},n",
" { symbol "V", name "G or C or A"},n",
" { symbol "T", name "Thymine/Uracil"},n",
" { symbol "W", name "A or T" },n",
" { symbol "Y", name "T or C"} ,n",
" { symbol "H", name "A or C or T" } ,n",
" { symbol "K", name "G or T" },n",
" { symbol "D", name "G or A or T" },n",
" { symbol "B" , name "G or T or C" },n",
" { symbol "N", name "A or G or C or T" }n",
" } , -- end of tablen",
" comps { -- complementsn",
" 0 ,n",
" 8 ,n",
" 4 ,n",
" 12,n",
" 2 ,n",
" 10,n",
" 6 ,n",
" 14,n",
" 1 ,n",
" 9 ,n",
" 5 ,n",
" 13,n",
" 3 ,n",
" 11,n",
" 7 ,n",
" 15n",
" }n",
" } n",
" }, -- end of codesn",
" maps {n",
" {n",
" from iupacna ,n",
" to ncbi2na ,n",
" num 25 ,n",
" start-at 65 ,n",
" table {n",
" 0, -- A -> An",
" 1, -- B -> Cn",
" 1, -- C -> Cn",
" 2, -- D -> Gn",
" 255,n",
" 255,n",
" 2, -- G -> Gn",
" 0, -- H -> An",
" 255,n",
" 255,n",
" 2, -- K -> Gn",
" 255,n",
" 1, -- M -> Cn",
" 0, -- N -> An",
" 255,n",
" 255,n",
" 255,n",
" 2, -- R -> Gn",
" 1, -- S -> Cn",
" 3, -- T -> Tn",
" 255,n",
" 0, -- V -> An",
" 3, -- W -> Tn",
" 255,n",
" 3 } -- Y -> Tn",
" }, n",
" {n",
" from iupacna ,n",
" to ncbi4na ,n",
" num 25 ,n",
" start-at 65 ,n",
" table {n",
" 1, -- An",
" 14, -- Bn",
" 2, -- Cn",
" 13, -- Dn",
" 255,n",
" 255,n",
" 4, -- Gn",
" 11, -- Hn",
" 255,n",
" 255,n",
" 12, -- Kn",
" 255,n",
" 3, -- Mn",
" 15, -- Nn",
" 255,n",
" 255,n",
" 255,n",
" 5, -- Rn",
" 6, -- Sn",
" 8, -- Tn",
" 255,n",
" 7, -- Vn",
" 9, -- Wn",
" 255,n",
" 10 } -- Yn",
" }, n",
" {n",
" from ncbi2na ,n",
" to iupacna ,n",
" num 4 ,n",
" table {n",
" 65, -- An",
" 67, -- Cn",
" 71, -- Gn",
" 84 } -- Tn",
" } ,n",
" {n",
" from ncbi2na ,n",
" to ncbi4na ,n",
" num 4 ,n",
" table {n",
" 1, -- An",
" 2, -- Cn",
" 4, -- Gn",
" 8 } -- Tn",
" } , n",
" {n",
" from ncbi4na ,n",
" to iupacna ,n",
" num 16 ,n",
" table {n",
" 78, -- gap -> Nn",
" 65, -- An",
" 67, -- Cn",
" 77, -- Mn",
" 71, -- Gn",
" 82, -- Rn",
" 83, -- Sn",
" 86, -- Vn",
" 84, -- Tn",
" 87, -- Wn",
" 89, -- Yn",
" 72, -- Hn",
" 75, -- Kn",
" 68, -- Dn",
" 66, -- Bn",
" 78 } -- Nn",
" } ,n",
" {n",
" from ncbi4na ,n",
" to ncbi2na ,n",
" num 16 ,n",
" table {n",
" 3, -- gap -> Tn",
" 0, -- A -> An",
" 1, -- C -> Cn",
" 1, -- M -> Cn",
" 2, -- G -> Gn",
" 2, -- R -> Gn",
" 1, -- S -> Cn",
" 0, -- V -> An",
" 3, -- T -> Tn",
" 3, -- W -> Tn",
" 3, -- Y -> Tn",
" 0, -- H -> An",
" 2, -- K -> Gn",
" 2, -- D -> Gn",
" 1, -- B -> Cn",
" 0 } -- N -> An",
" } ,n",
" {n",
" from iupacaa ,n",
" to ncbieaa ,n",
" num 26 ,n",
" start-at 65 ,n",
" table {n",
" 65 , -- they map directlyn",
" 66 ,n",
" 67 ,n",
" 68,n",
" 69,n",
" 70,n",
" 71,n",
" 72,n",
" 73,n",
" 255, -- Jn",
" 75,n",
" 76,n",
" 77,n",
" 78,n",
" 255, -- On",
" 80,n",
" 81,n",
" 82,n",
" 83,n",
" 84,n",
" 85, -- U - was 255n",
" 86,n",
" 87,n",
" 88,n",
" 89,n",
" 90 }n",
" } ,n",
" {n",
" from ncbieaa ,n",
" to iupacaa ,n",
" num 49 ,n",
" start-at 42 ,n",
" table {n",
" 88 , -- termination -> Xn",
" 255,n",
" 255,n",
" 88, -- Gap -> Xn",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 65 , -- from here they map directlyn",
" 66 ,n",
" 67 ,n",
" 68,n",
" 69,n",
" 70,n",
" 71,n",
" 72,n",
" 73,n",
" 255, -- Jn",
" 75,n",
" 76,n",
" 77,n",
" 78,n",
" 255, -- On",
" 80,n",
" 81,n",
" 82,n",
" 83,n",
" 84,n",
" 85, -- U was -> Cn",
" 86,n",
" 87,n",
" 88,n",
" 89,n",
" 90 }n",
" } ,n",
" {n",
" from iupacaa ,n",
" to ncbistdaa ,n",
" num 26 ,n",
" start-at 65 ,n",
" table {n",
" 1 , -- they map directlyn",
" 2 ,n",
" 3 ,n",
" 4,n",
" 5,n",
" 6,n",
" 7,n",
" 8,n",
" 9,n",
" 255, -- Jn",
" 10,n",
" 11,n",
" 12,n",
" 13,n",
" 255, -- On",
" 14,n",
" 15,n",
" 16,n",
" 17,n",
" 18,n",
" 24, -- U - was 255n",
" 19,n",
" 20,n",
" 21,n",
" 22,n",
" 23 }n",
" } ,n",
" {n",
" from ncbieaa ,n",
" to ncbistdaa ,n",
" num 49 ,n",
" start-at 42 ,n",
" table {n",
" 25, -- terminationn",
" 255,n",
" 255,n",
" 0, -- Gapn",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 255,n",
" 1 , -- they map directlyn",
" 2 ,n",
" 3 ,n",
" 4,n",
" 5,n",
" 6,n",
" 7,n",
" 8,n",
" 9,n",
" 255, -- Jn",
" 10,n",
" 11,n",
" 12,n",
" 13,n",
" 255, -- On",
" 14,n",
" 15,n",
" 16,n",
" 17,n",
" 18,n",
" 24, -- Un",
" 19,n",
" 20,n",
" 21,n",
" 22,n",
" 23 }n",
" } ,n",
" {n",
" from ncbistdaa ,n",
" to ncbieaa ,n",
" num 26 ,n",
" table {n",
" 45 , -- "-"n",
" 65 , -- they map directly with holes for O and Jn",
" 66 ,n",
" 67 ,n",
" 68,n",
" 69,n",
" 70,n",
" 71,n",
" 72,n",
" 73,n",
" 75,n",
" 76,n",
" 77,n",
" 78,n",
" 80,n",
" 81,n",
" 82,n",
" 83,n",
" 84,n",
" 86,n",
" 87,n",
" 88,n",
" 89,n",
" 90,n",
" 85, -- Un",
" 42} -- *n",
" } ,n",
" {n",
" from ncbistdaa ,n",
" to iupacaa ,n",
" num 26 ,n",
" table {n",
" 255 , -- "-"n",
" 65 , -- they map directly with holes for O and Jn",
" 66 ,n",
" 67 ,n",
" 68,n",
" 69,n",
" 70,n",
" 71,n",
" 72,n",
" 73,n",
" 75,n",
" 76,n",
" 77,n",
" 78,n",
" 80,n",
" 81,n",
" 82,n",
" 83,n",
" 84,n",
" 86,n",
" 87,n",
" 88,n",
" 89,n",
" 90,n",
" 85, -- U - was 88n",
" 255} -- *n",
" } n",
" n",
" } -- end of mapsn",
"-- end of seq-code-set -- }", // make sure '}' is last symbol of ASN text
0 // to indicate that there is no more data
};
END_objects_SCOPE
END_NCBI_SCOPE
/*
* ---------------------------------------------------------------------------
* $Log: seqport_util.cpp,v $
* Revision 1000.4 2004/06/01 19:33:29 gouriano
* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R6.24
*
* Revision 6.24 2004/05/19 17:25:14 gorelenk
* Added include of PCH - ncbi_pch.hpp
*
* Revision 6.23 2004/03/30 21:25:09 shomrat
* Do not attempt to pack protein sequences
*
* Revision 6.22 2004/01/22 19:13:26 shomrat
* fixed bug in complement tables
*
* Revision 6.21 2003/11/28 19:01:54 vasilche
* Avoid calling CStreamUtils::Pushback() when constructing objects from text ASN.
* Fixed warnings about conversion char -> unsigned char.
*
* Revision 6.20 2003/11/21 14:45:03 grichenk
* Replaced runtime_error with CException
*
* Revision 6.19 2003/11/06 22:15:58 shomrat
* fixed behavior for default length value
*
* Revision 6.18 2003/11/06 16:12:32 shomrat
* changed seqport_util to use sequtil
*
* Revision 6.17 2003/06/04 17:03:11 rsmith
* Move static mutex out of function to work around CW complex initialization bug.
*
* Revision 6.16 2003/03/11 15:53:25 kuznets
* iterate -> ITERATE
*
* Revision 6.15 2003/01/30 22:50:30 kans
* U (selenocysteine) is now valid in IUPAC alphabet
*
* Revision 6.14 2002/09/19 20:05:44 vasilche
* Safe initialization of static mutexes
*
* Revision 6.13 2002/09/13 18:34:40 dicuccio
* Fixed problem with static object instantiation and type information.
* Broke the Seq-code-set ASN.1 blob into more easily editable lines (kans).
*
* Revision 6.12 2002/05/15 17:57:03 ucko
* Make the recently introduced tables STL vectors rather than primitive
* arrays to work around a GCC 3.0.4 optimizer bug.
*
* Revision 6.11 2002/05/14 15:15:16 clausen
* Added IsCodeAvailable, GetCodeIndexFromTo, GetName, GetIndexComplement, GetMapToIndex
*
* Revision 6.10 2002/05/03 21:28:14 ucko
* Introduce T(Signed)SeqPos.
*
* Revision 6.9 2002/04/25 19:37:03 clausen
* Fixed bug in MapNcbi2naToNcbi4na that caused corrupiton of out_seq
*
* Revision 6.8 2002/03/27 19:53:18 grichenk
* Fixed CR/LF problem in the source
*
* Revision 6.7 2002/01/12 07:40:22 vakatov
* Fixed multiple dangerous typos ('&' instead of '&&' in IFs)
*
* Revision 6.6 2002/01/10 19:21:34 clausen
* Added GetIupacaa3, GetCode, and GetIndex
*
* Revision 6.5 2001/10/17 18:35:33 clausen
* Fixed machine dependencies in InitFastNcbi4naIupacna and InitFastNcbi2naNcbi4na
*
* Revision 6.4 2001/10/17 13:04:30 clausen
* Fixed InitFastNcbi2naIupacna to remove hardware dependency
*
* Revision 6.3 2001/09/07 14:16:50 ucko
* Cleaned up external interface.
*
* Revision 6.2 2001/09/06 20:43:32 ucko
* Fix iterator types (caught by gcc 3.0.1).
*
* Revision 6.1 2001/08/24 00:34:23 vakatov
* Initial revision
*
* ===========================================================================
*/