seqport_util.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:201k
- // Allocate memory for out_seq
- out_seq_data.resize(uLength);
- // Get iterator for out_seq_data
- string::iterator i_out = out_seq_data.begin();
- // Declare iterator for in_seq_data and determine begin and end
- vector<char>::const_iterator i_in;
- vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
- vector<char>::const_iterator i_in_end = i_in_begin + uLength;
- // Loop through input and convert to output
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- *(i_out++) =
- m_NcbistdaaNcbieaa->m_Table[static_cast<unsigned char>(*i_in)];
- return uLength;
- }
- // Function to convert ncbistdaa (byte) to iupacaa (byte)
- TSeqPos CSeqportUtil_implementation::MapNcbistdaaToIupacaa
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
- // Get read & write reference to out_seq data
- out_seq->Reset();
- string& out_seq_data = out_seq->SetIupacaa().Set();
- // If uBeginIdx beyond end of in_seq, return
- if(uBeginIdx >= in_seq_data.size())
- return 0;
- // Adjust uBeginIdx and uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Allocate memory for out_seq
- out_seq_data.resize(uLength);
- // Get iterator for out_seq_data
- string::iterator i_out = out_seq_data.begin();
- // Declare iterator for in_seq_data and determine begin and end
- vector<char>::const_iterator i_in;
- vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
- vector<char>::const_iterator i_in_end = i_in_begin + uLength;
- // Loop through input and convert to output
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(i_out++)) =
- m_NcbistdaaIupacaa->m_Table[static_cast<unsigned char>(*i_in)];
- return uLength;
- }
- */
- // Fast validation of iupacna sequence
- bool CSeqportUtil_implementation::FastValidateIupacna
- (const CSeq_data& in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const string& in_seq_data = in_seq.GetIupacna().Get();
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return true;
- // Adjust uBeginIdx, uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Declare in iterator on in_seq and determine begin and end
- string::const_iterator itor;
- string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
- string::const_iterator e_itor = b_itor + uLength;
- // Perform Fast Validation
- unsigned char ch = 'x00';
- for(itor = b_itor; itor != e_itor; ++itor)
- ch |= m_Iupacna->m_Table[static_cast<unsigned char>(*itor)];
- // Return true if valid, otherwise false
- return (ch != 255);
- }
- bool CSeqportUtil_implementation::FastValidateNcbieaa
- (const CSeq_data& in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const string& in_seq_data = in_seq.GetNcbieaa().Get();
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return true;
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return true;
- // Adjust uBeginIdx, uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Declare in iterator on in_seq and determine begin and end
- string::const_iterator itor;
- string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
- string::const_iterator e_itor = b_itor + uLength;
- // Perform Fast Validation
- unsigned char ch = 'x00';
- for(itor = b_itor; itor != e_itor; ++itor)
- ch |= m_Ncbieaa->m_Table[static_cast<unsigned char>(*itor)];
- // Return true if valid, otherwise false
- return (ch != 255);
- }
- bool CSeqportUtil_implementation::FastValidateNcbistdaa
- (const CSeq_data& in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return true;
- // Adjust uBeginIdx, uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Declare in iterator on in_seq and determine begin and end
- vector<char>::const_iterator itor;
- vector<char>::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
- vector<char>::const_iterator e_itor = b_itor + uLength;
- // Perform Fast Validation
- unsigned char ch = 'x00';
- for(itor = b_itor; itor != e_itor; ++itor)
- ch |= m_Ncbistdaa->m_Table[static_cast<unsigned char>(*itor)];
- // Return true if valid, otherwise false
- return (ch != 255);
- }
- bool CSeqportUtil_implementation::FastValidateIupacaa
- (const CSeq_data& in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const string& in_seq_data = in_seq.GetIupacaa().Get();
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return true;
- // Adjust uBeginIdx, uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Declare in iterator on in_seq and determine begin and end
- string::const_iterator itor;
- string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
- string::const_iterator e_itor = b_itor + uLength;
- // Perform Fast Validation
- unsigned char ch = 'x00';
- for(itor=b_itor; itor!=e_itor; ++itor)
- ch |= m_Iupacaa->m_Table[static_cast<unsigned char>(*itor)];
- // Return true if valid, otherwise false
- return (ch != 255);
- }
- void CSeqportUtil_implementation::ValidateIupacna
- (const CSeq_data& in_seq,
- vector<TSeqPos>* badIdx,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const string& in_seq_data = in_seq.GetIupacna().Get();
- // clear out_indices
- badIdx->clear();
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return;
- // Adjust uBeginIdx, uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Declare in iterator on in_seq and determine begin and end
- string::const_iterator itor;
- string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
- string::const_iterator e_itor = b_itor + uLength;
- // Perform Validation
- TSeqPos nIdx = uBeginIdx;
- for(itor = b_itor; itor != e_itor; ++itor)
- if(m_Iupacna->m_Table[static_cast<unsigned char>(*itor)] == char(255))
- badIdx->push_back(nIdx++);
- else
- nIdx++;
- // Return list of bad indices
- return;
- }
- void CSeqportUtil_implementation::ValidateNcbieaa
- (const CSeq_data& in_seq,
- vector<TSeqPos>* badIdx,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const string& in_seq_data = in_seq.GetNcbieaa().Get();
- // clear badIdx
- badIdx->clear();
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return;
- // Adjust uBeginIdx, uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Declare in iterator on in_seq and determine begin and end
- string::const_iterator itor;
- string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
- string::const_iterator e_itor = b_itor + uLength;
- // Perform Validation
- TSeqPos nIdx = uBeginIdx;
- for(itor = b_itor; itor != e_itor; ++itor)
- if(m_Ncbieaa->m_Table[static_cast<unsigned char>(*itor)] == char(255))
- badIdx->push_back(nIdx++);
- else
- nIdx++;
- // Return vector of bad indices
- return;
- }
- void CSeqportUtil_implementation::ValidateNcbistdaa
- (const CSeq_data& in_seq,
- vector<TSeqPos>* badIdx,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
- // Create a vector to return
- badIdx->clear();
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return;
- // Adjust uBeginIdx, uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Declare in iterator on in_seq and determine begin and end
- vector<char>::const_iterator itor;
- vector<char>::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
- vector<char>::const_iterator e_itor = b_itor + uLength;
- // Perform Validation
- TSeqPos nIdx = uBeginIdx;
- for(itor=b_itor; itor!=e_itor; ++itor)
- if(m_Ncbistdaa->m_Table[static_cast<unsigned char>(*itor)]==char(255))
- badIdx->push_back(nIdx++);
- else
- nIdx++;
- // Return vector of bad indices
- return;
- }
- void CSeqportUtil_implementation::ValidateIupacaa
- (const CSeq_data& in_seq,
- vector<TSeqPos>* badIdx,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const string& in_seq_data = in_seq.GetIupacaa().Get();
- // Create a vector to return
- badIdx->clear();
- // Check that uBeginIdx is not beyond end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return;
- // Adjust uBeginIdx, uLength
- Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
- // Declare in iterator on in_seq and determine begin and end
- string::const_iterator itor;
- string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
- string::const_iterator e_itor = b_itor + uLength;
- // Perform Validation
- TSeqPos nIdx = uBeginIdx;
- for(itor=b_itor; itor!=e_itor; ++itor)
- if(m_Iupacaa->m_Table[static_cast<unsigned char>(*itor)] == char(255))
- badIdx->push_back(nIdx++);
- else
- nIdx++;
- // Return vector of bad indices
- return;
- }
- // Function to make copy of ncbi2na type sequences
- TSeqPos CSeqportUtil_implementation::GetNcbi2naCopy
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get reference to out_seq data
- out_seq->Reset();
- vector<char>& out_seq_data = out_seq->SetNcbi2na().Set();
- // Get reference to in_seq data
- const vector<char>& in_seq_data = in_seq.GetNcbi2na().Get();
- // Return if uBeginIdx is after end of in_seq
- if(uBeginIdx >= 4 * in_seq_data.size())
- return 0;
- // Set uLength to actual valid length in out_seq
- if( (uLength ==0) || ((uBeginIdx + uLength) > (4*in_seq_data.size() )) )
- uLength = 4*in_seq_data.size() - uBeginIdx;
- // Allocate memory for out_seq data
- if((uLength % 4) == 0)
- out_seq_data.resize(uLength/4);
- else
- out_seq_data.resize(uLength/4 + 1);
- // Get iterator on out_seq_data
- vector<char>::iterator i_out = out_seq_data.begin() - 1;
- // Calculate amounts to shift bits
- unsigned int lShift, rShift;
- lShift = 2*(uBeginIdx % 4);
- rShift = 8 - lShift;
- // Get interators on in_seq
- vector<char>::const_iterator i_in;
- vector<char>::const_iterator i_in_begin =
- in_seq_data.begin() + uBeginIdx/4;
- // Determine number of input bytes to process
- SIZE_TYPE uNumBytes = uLength/4;
- if((uLength % 4) != 0)
- ++uNumBytes;
- // Prevent access beyond end of in_seq_data
- bool bDoLastByte = false;
- if((uBeginIdx/4 + uNumBytes) >= in_seq_data.size())
- {
- uNumBytes = in_seq_data.size() - uBeginIdx/4 - 1;
- bDoLastByte = true;
- }
- vector<char>::const_iterator i_in_end = i_in_begin + uNumBytes;
- // Loop through input sequence and copy to output sequence
- if(lShift > 0)
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(++i_out)) =
- ((*i_in) << lShift) | (((*(i_in+1)) & 255) >> rShift);
- else
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(++i_out)) = (*i_in);
- // Handle last input byte if necessary
- if(bDoLastByte)
- (*(++i_out)) = (*i_in) << lShift;
- return uLength;
- }
- // Function to make copy of ncbi4na type sequences
- TSeqPos CSeqportUtil_implementation::GetNcbi4naCopy
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get reference to out_seq data
- out_seq->Reset();
- vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
- // Get reference to in_seq data
- const vector<char>& in_seq_data = in_seq.GetNcbi4na().Get();
- // Return if uBeginIdx is after end of in_seq
- if(uBeginIdx >= 2 * in_seq_data.size())
- return 0;
- // Set uLength to actual valid length in out_seq
- if( (uLength ==0) || ((uBeginIdx + uLength) > (2*in_seq_data.size() )) )
- uLength = 2*in_seq_data.size() - uBeginIdx;
- // Allocate memory for out_seq data
- if((uLength % 2) == 0)
- out_seq_data.resize(uLength/2);
- else
- out_seq_data.resize(uLength/2 + 1);
- // Get iterator on out_seq_data
- vector<char>::iterator i_out = out_seq_data.begin() - 1;
- // Calculate amounts to shift bits
- unsigned int lShift, rShift;
- lShift = 4*(uBeginIdx % 2);
- rShift = 8 - lShift;
- // Get interators on in_seq
- vector<char>::const_iterator i_in;
- vector<char>::const_iterator i_in_begin =
- in_seq_data.begin() + uBeginIdx/2;
- // Determine number of input bytes to process
- SIZE_TYPE uNumBytes = uLength/2;
- if((uLength % 2) != 0)
- ++uNumBytes;
- // Prevent access beyond end of in_seq_data
- bool bDoLastByte = false;
- if((uBeginIdx/2 + uNumBytes) >= in_seq_data.size())
- {
- uNumBytes = in_seq_data.size() - uBeginIdx/2 - 1;
- bDoLastByte = true;
- }
- vector<char>::const_iterator i_in_end = i_in_begin + uNumBytes;
- // Loop through input sequence and copy to output sequence
- if(lShift > 0)
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(++i_out)) =
- ((*i_in) << lShift) | (((*(i_in+1)) & 255) >> rShift);
- else
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(++i_out)) = (*i_in);
- // Handle last input byte
- if(bDoLastByte)
- (*(++i_out)) = (*i_in) << lShift;
- return uLength;
- }
- // Function to make copy of iupacna type sequences
- TSeqPos CSeqportUtil_implementation::GetIupacnaCopy
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get reference to out_seq data
- out_seq->Reset();
- string& out_seq_data = out_seq->SetIupacna().Set();
- // Get reference to in_seq data
- const string& in_seq_data = in_seq.GetIupacna().Get();
- // Return if uBeginIdx is after end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return 0;
- // Set uLength to actual valid length in out_seq
- if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
- uLength = in_seq_data.size() - uBeginIdx;
- // Allocate memory for out_seq data
- out_seq_data.resize(uLength);
- // Get iterator on out_seq_data
- string::iterator i_out = out_seq_data.begin() - 1;
- // Get interators on in_seq
- string::const_iterator i_in;
- string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
- string::const_iterator i_in_end = i_in_begin + uLength;
- // Loop through input sequence and copy to output sequence
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(++i_out)) = (*i_in);
- return uLength;
- }
- // Function to make copy of ncbieaa type sequences
- TSeqPos CSeqportUtil_implementation::GetNcbieaaCopy
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get reference to out_seq data
- out_seq->Reset();
- string& out_seq_data = out_seq->SetNcbieaa().Set();
- // Get reference to in_seq data
- const string& in_seq_data = in_seq.GetNcbieaa().Get();
- // Return if uBeginIdx is after end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return 0;
- // Set uLength to actual valid length in out_seq
- if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
- uLength = in_seq_data.size() - uBeginIdx;
- // Allocate memory for out_seq data
- out_seq_data.resize(uLength);
- // Get iterator on out_seq_data
- string::iterator i_out = out_seq_data.begin() - 1;
- // Get interators on in_seq
- string::const_iterator i_in;
- string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
- string::const_iterator i_in_end = i_in_begin + uLength;
- // Loop through input sequence and copy to output sequence
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(++i_out)) = (*i_in);
- return uLength;
- }
- // Function to make copy of ncbistdaa type sequences
- TSeqPos CSeqportUtil_implementation::GetNcbistdaaCopy
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get reference to out_seq data
- out_seq->Reset();
- vector<char>& out_seq_data = out_seq->SetNcbistdaa().Set();
- // Get reference to in_seq data
- const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
- // Return if uBeginIdx is after end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return 0;
- // Set uLength to actual valid length in out_seq
- if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
- uLength = in_seq_data.size() - uBeginIdx;
- // Allocate memory for out_seq data
- out_seq_data.resize(uLength);
- // Get iterator on out_seq_data
- vector<char>::iterator i_out = out_seq_data.begin() - 1;
- // Get interators on in_seq
- vector<char>::const_iterator i_in;
- vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
- vector<char>::const_iterator i_in_end = i_in_begin + uLength;
- // Loop through input sequence and copy to output sequence
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(++i_out)) = (*i_in);
- return uLength;
- }
- // Function to make copy of iupacaa type sequences
- TSeqPos CSeqportUtil_implementation::GetIupacaaCopy
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get reference to out_seq data
- out_seq->Reset();
- string& out_seq_data = out_seq->SetIupacaa().Set();
- // Get reference to in_seq data
- const string& in_seq_data = in_seq.GetIupacaa().Get();
- // Return if uBeginIdx is after end of in_seq
- if(uBeginIdx >= in_seq_data.size())
- return 0;
- // Set uLength to actual valid length in out_seq
- if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
- uLength = in_seq_data.size() - uBeginIdx;
- // Allocate memory for out_seq data
- out_seq_data.resize(uLength);
- // Get iterator on out_seq_data
- string::iterator i_out = out_seq_data.begin() - 1;
- // Get interators on in_seq
- string::const_iterator i_in;
- string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
- string::const_iterator i_in_end = i_in_begin + uLength;
- // Loop through input sequence and copy to output sequence
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*(++i_out)) = (*i_in);
- return uLength;
- }
- // Function to adjust uBeginIdx to lie on an in_seq byte boundary
- // and uLength to lie on on an out_seq byte boundary. Returns
- // overhang
- TSeqPos CSeqportUtil_implementation::Adjust
- (TSeqPos* uBeginIdx,
- TSeqPos* uLength,
- TSeqPos uInSeqBytes,
- TSeqPos uInSeqsPerByte,
- TSeqPos uOutSeqsPerByte)
- const
- {
- // Adjust uBeginIdx and uLength to acceptable values
- // If uLength = 0, assume convert to end of sequence
- if(*uLength == 0)
- *uLength = uInSeqsPerByte * uInSeqBytes;
- // Ensure that uBeginIdx does not start at or after end of in_seq_data
- if(*uBeginIdx >= uInSeqsPerByte * uInSeqBytes)
- *uBeginIdx = uInSeqsPerByte * uInSeqBytes - uInSeqsPerByte;
- // Ensure that uBeginIdx is a multiple of uInSeqsPerByte and adjust uLength
- *uLength += *uBeginIdx % uInSeqsPerByte;
- *uBeginIdx = uInSeqsPerByte * (*uBeginIdx/uInSeqsPerByte);
- // Adjust uLength so as not to go beyond end of in_seq_data
- if(*uLength > uInSeqsPerByte * uInSeqBytes - *uBeginIdx)
- *uLength = uInSeqsPerByte * uInSeqBytes - *uBeginIdx;
- // Adjust uLength down to multiple of uOutSeqsPerByte
- // and calculate overhang (overhang handled separately at end)
- TSeqPos uOverhang = *uLength % uOutSeqsPerByte;
- *uLength = uOutSeqsPerByte * (*uLength / uOutSeqsPerByte);
- return uOverhang;
- }
- // Loops through an ncbi4na input sequence and determines
- // the ambiguities that would result from conversion to an ncbi2na sequence
- // On return, out_seq contains the ncbi4na bases that become ambiguous and
- // out_indices contains the indices of the abiguous bases in in_seq
- TSeqPos CSeqportUtil_implementation::GetAmbigs_ncbi4na_ncbi2na
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- vector<TSeqPos>* out_indices,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const vector<char>& in_seq_data = in_seq.GetNcbi4na().Get();
- // Get read & write reference to out_seq data
- out_seq->Reset();
- vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
- // Adjust uBeginIdx and uLength, if necessary
- if(uBeginIdx >= 2*in_seq_data.size())
- return 0;
- if((uLength == 0) || (((uBeginIdx + uLength) > 2*in_seq_data.size())))
- uLength = 2*in_seq_data.size() - uBeginIdx;
- // Save uBeginIdx and adjust uBeginIdx = 0 mod 2
- TSeqPos uBeginSav = uBeginIdx;
- TSeqPos uLenSav = uLength;
- uLength += uBeginIdx % 2;
- uBeginIdx = 2*(uBeginIdx/2);
- // Allocate memory for out_seq_data and out_indices
- // Note, these will be shrunk at the end to correspond
- // to actual memory needed. Note, in test cases, over 50% of the
- // time spent in this method is spent in the next two
- // statements and 3/4 of that is spent in the second statement.
- out_seq_data.resize(uLength/2 + (uLength % 2));
- out_indices->resize(uLength);
- // Variable to track number of ambigs
- TSeqPos uNumAmbigs = 0;
- // Get iterators to input sequence
- vector<char>::const_iterator i_in;
- vector<char>::const_iterator i_in_begin =
- in_seq_data.begin() + uBeginIdx/2;
- vector<char>::const_iterator i_in_end =
- i_in_begin + uLength/2 + (uLength % 2);
- // Get iterators to out_seq_data and out_indices
- vector<char>::iterator i_out_seq = out_seq_data.begin();
- vector<TSeqPos>::iterator i_out_idx = out_indices->begin();
- // Index of current input seq base
- TSeqPos uIdx = uBeginIdx;
- // Loop through input sequence looking for ambiguities
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in) {
- switch (m_DetectAmbigNcbi4naNcbi2na->m_Table
- [static_cast<unsigned char>(*i_in)]) {
- case 1: // Low order input nible ambiguous
- // Put low order input nible in low order output nible
- if(uNumAmbigs & 1)
- {
- (*i_out_seq) |= (*i_in) & 'x0f';
- ++i_out_seq;
- }
- // Put low order input nible in high order output nible
- else
- (*i_out_seq) = (*i_in) << 4;
- // Record input index that was ambiguous
- (*i_out_idx) = uIdx + 1;
- ++i_out_idx;
- // Increment number of ambiguities
- uNumAmbigs++;
- break;
- case 2: // High order input nible ambiguous
- // Put high order input nible in low order output nible
- if(uNumAmbigs & 1)
- {
- (*i_out_seq) |= ((*i_in) >> 4) & 'x0f';
- ++i_out_seq;
- }
- // Put high order input nible in high order output nible
- else
- (*i_out_seq) = (*i_in) & 'xf0';
- // Record input index that was ambiguous
- (*i_out_idx) = uIdx;
- ++i_out_idx;
- // Increment number of ambiguities
- uNumAmbigs++;
- break;
- case 3: // Both input nibles ambiguous
- // Put high order input nible in low order
- // output nible, move to the next output byte
- // and put the low order input nibble in the
- // high order output nible.
- if(uNumAmbigs & 1)
- {
- (*i_out_seq) |= ((*i_in) >> 4) & 'x0f';
- (*(++i_out_seq)) = (*i_in) << 4;
- }
- // Put high order input nible in high order
- // output nible, put low order input nible
- // in low order output nible, and move to
- // next output byte
- else
- {
- (*i_out_seq) = (*i_in);
- ++i_out_seq;
- }
- // Record indices that were ambiguous
- (*i_out_idx) = uIdx;
- (*(++i_out_idx)) = uIdx + 1;
- ++i_out_idx;
- // Increment the number of ambiguities
- uNumAmbigs+=2;
- break;
- }
- // Increment next input byte.
- uIdx += 2;
- }
- // Shrink out_seq_data and out_indices to actual sizes needed
- out_indices->resize(uNumAmbigs);
- out_seq_data.resize(uNumAmbigs/2 + uNumAmbigs % 2);
- // Check to ensure that ambigs outside of requested range are not included
- TSeqPos uKeepBeg = 0;
- TSeqPos uKeepLen = 0;
- if((*out_indices)[0] < uBeginSav)
- {
- uKeepBeg = 1;
- out_indices->erase(out_indices->begin(), out_indices->begin() + 1);
- }
- if((*out_indices)[out_indices->size()-1] >= uBeginSav + uLenSav)
- {
- out_indices->pop_back();
- uKeepLen = out_indices->size();
- }
- if((uKeepBeg != 0) || (uKeepLen != 0))
- uNumAmbigs = KeepNcbi4na(out_seq, uKeepBeg, uKeepLen);
- return uNumAmbigs;
- }
- // Loops through an iupacna input sequence and determines
- // the ambiguities that would result from conversion to an ncbi2na sequence.
- // On return, out_seq contains the iupacna bases that become ambiguous and
- // out_indices contains the indices of the abiguous bases in in_seq. The
- // return is the number of ambiguities found.
- TSeqPos CSeqportUtil_implementation::GetAmbigs_iupacna_ncbi2na
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- vector<TSeqPos>* out_indices,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get read-only reference to in_seq data
- const string& in_seq_data = in_seq.GetIupacna().Get();
- // Get read & write reference to out_seq data
- out_seq->Reset();
- string& out_seq_data = out_seq->SetIupacna().Set();
- // Validate/adjust uBeginIdx and uLength
- if(uBeginIdx >= in_seq_data.size())
- return 0;
- if((uLength == 0) || ((uBeginIdx + uLength) > in_seq_data.size()))
- uLength = in_seq_data.size() - uBeginIdx;
- // Allocate memory for out_seq_data and out_indices
- // Note, these will be shrunk at the end to correspond
- // to actual memory needed.
- out_seq_data.resize(uLength);
- out_indices->resize(uLength);
- // Variable to track number of ambigs
- TSeqPos uNumAmbigs = 0;
- // Get iterators to input sequence
- string::const_iterator i_in;
- string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
- string::const_iterator i_in_end = i_in_begin + uLength;
- // Get iterators to out_seq_data and out_indices
- string::iterator i_out_seq = out_seq_data.begin();
- vector<TSeqPos>::iterator i_out_idx = out_indices->begin();
- // Index of current input seq base
- TSeqPos uIdx = uBeginIdx;
- // Loop through input sequence looking for ambiguities
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- {
- if(m_DetectAmbigIupacnaNcbi2na->m_Table
- [static_cast<unsigned char>(*i_in)] == 1)
- {
- (*i_out_seq) = (*i_in);
- ++i_out_seq;
- (*i_out_idx) = uIdx;
- ++i_out_idx;
- ++uNumAmbigs;
- }
- ++uIdx;
- }
- out_seq_data.resize(uNumAmbigs);
- out_indices->resize(uNumAmbigs);
- return uNumAmbigs;
- }
- // Method to implement Keep for Ncbi2na. Returns length of
- // kept sequence
- TSeqPos CSeqportUtil_implementation::KeepNcbi2na
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get a reference to in_seq
- vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
- // If uBeginIdx past the end of in_seq, return empty in_seq
- if(uBeginIdx >= in_seq_data.size()*4)
- {
- in_seq_data.clear();
- return 0;
- }
- // If uLength == 0, Keep from uBeginIdx to end of in_seq
- if(uLength == 0)
- uLength = 4*in_seq_data.size() - uBeginIdx;
- // If uLength goes beyond the end of the sequence, trim
- // it back to the end of the sequence
- if(uLength > (4*in_seq_data.size() - uBeginIdx))
- uLength = 4*in_seq_data.size() - uBeginIdx;
- // If entire sequence is being requested, just return
- if((uBeginIdx == 0) && (uLength >= 4*in_seq_data.size()))
- return uLength;
- // Determine index in in_seq_data that holds uBeginIdx residue
- TSeqPos uStart = uBeginIdx/4;
- // Determine index within start byte
- TSeqPos uStartInByte = 2 * (uBeginIdx % 4);
- // Calculate masks
- unsigned char rightMask = 0xff << uStartInByte;
- unsigned char leftMask = ~rightMask;
- // Determine index in in_seq_data that holds uBeginIdx + uLength
- // residue
- TSeqPos uEnd = (uBeginIdx + uLength - 1)/4;
- // Get iterator for writting
- vector<char>::iterator i_write;
- // Determine begin and end of read
- vector<char>::iterator i_read = in_seq_data.begin() + uStart;
- vector<char>::iterator i_read_end = in_seq_data.begin() + uEnd;
- // Loop through in_seq_data and copy data of desire
- // sub sequence to begining of in_seq_data
- for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
- (*i_write) = (((*i_read) << uStartInByte) | leftMask) &
- (((*(i_read+1)) >> (8-uStartInByte)) | rightMask);
- ++i_read;
- }
- // Handle last byte
- (*i_write) = (*i_read) << uStartInByte;
- // Shrink in_seq to to size needed
- TSeqPos uSize = uLength/4;
- if((uLength % 4) != 0)
- uSize++;
- in_seq_data.resize(uSize);
- return uLength;
- }
- // Method to implement Keep for Ncbi4na. Returns length of
- // kept sequence.
- TSeqPos CSeqportUtil_implementation::KeepNcbi4na
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get a reference to in_seq
- vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
- // If uBeginIdx past the end of in_seq, return empty in_seq
- if(uBeginIdx >= in_seq_data.size()*2)
- {
- in_seq_data.clear();
- return 0;
- }
- // If uLength == 0, Keep from uBeginIdx to end of in_seq
- if(uLength == 0)
- uLength = 2*in_seq_data.size() - uBeginIdx;
- // If uLength goes beyond the end of the sequence, trim
- // it back to the end of the sequence
- if(uLength > (2*in_seq_data.size() - uBeginIdx))
- uLength = 2*in_seq_data.size() - uBeginIdx;
- // If entire sequence is being requested, just return
- if((uBeginIdx == 0) && (uLength >= 2*in_seq_data.size()))
- return uLength;
- // Determine index in in_seq_data that holds uBeginIdx residue
- TSeqPos uStart = uBeginIdx/2;
- // Determine index within start byte
- unsigned int uStartInByte = 4 * (uBeginIdx % 2);
- // Calculate masks
- unsigned char rightMask = 0xff << uStartInByte;
- unsigned char leftMask = ~rightMask;
- // Determine index in in_seq_data that holds uBeginIdx + uLength
- // residue
- TSeqPos uEnd = (uBeginIdx + uLength - 1)/2;
- // Get iterator for writting
- vector<char>::iterator i_write;
- // Determine begin and end of read
- vector<char>::iterator i_read = in_seq_data.begin() + uStart;
- vector<char>::iterator i_read_end = in_seq_data.begin() + uEnd;
- // Loop through in_seq_data and copy data of desire
- // sub sequence to begining of in_seq_data
- for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
- (*i_write) = (((*i_read) << uStartInByte) | leftMask) &
- (((*(i_read+1)) >> (8-uStartInByte)) | rightMask);
- ++i_read;
- }
- // Handle last byte
- (*i_write) = (*i_read) << uStartInByte;
- // Shrink in_seq to to size needed
- TSeqPos uSize = uLength/2;
- if((uLength % 2) != 0)
- uSize++;
- in_seq_data.resize(uSize);
- return uLength;
- }
- // Method to implement Keep for Iupacna. Return length
- // of kept sequence
- TSeqPos CSeqportUtil_implementation::KeepIupacna
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get a reference to in_seq
- string& in_seq_data = in_seq->SetIupacna().Set();
- // If uBeginIdx past end of in_seq, return empty in_seq
- if(uBeginIdx >= in_seq_data.size())
- {
- in_seq_data.erase();
- return 0;
- }
- // If uLength is 0, Keep from uBeginIdx to end of in_seq
- if(uLength == 0)
- uLength = in_seq_data.size() - uBeginIdx;
- // Check that uLength does not go beyond end of in_seq
- if((uBeginIdx + uLength) > in_seq_data.size())
- uLength = in_seq_data.size() - uBeginIdx;
- // If uBeginIdx == 0 and uLength == in_seq_data.size()
- // just return as the entire sequence is being requested
- if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
- return uLength;
- // Get two iterators on in_seq, one read and one write
- string::iterator i_read;
- string::iterator i_write;
- // Determine begin and end of read
- i_read = in_seq_data.begin() + uBeginIdx;
- string::iterator i_read_end = i_read + uLength;
- // Loop through in_seq for uLength bases
- // and shift uBeginIdx to beginning
- for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write)
- {
- (*i_write) = (*i_read);
- ++i_read;
- }
- // Resize in_seq_data to uLength
- in_seq_data.resize(uLength);
- return uLength;
- }
- // Method to implement Keep for Ncbieaa
- TSeqPos CSeqportUtil_implementation::KeepNcbieaa
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get a reference to in_seq
- string& in_seq_data = in_seq->SetNcbieaa().Set();
- // If uBeginIdx past end of in_seq, return empty in_seq
- if(uBeginIdx >= in_seq_data.size())
- {
- in_seq_data.erase();
- return 0;
- }
- // If uLength is 0, Keep from uBeginIdx to end of in_seq
- if(uLength == 0)
- uLength = in_seq_data.size() - uBeginIdx;
- // Check that uLength does not go beyond end of in_seq
- if((uBeginIdx + uLength) > in_seq_data.size())
- uLength = in_seq_data.size() - uBeginIdx;
- // If uBeginIdx == 0 and uLength == in_seq_data.size()
- // just return as the entire sequence is being requested
- if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
- return uLength;
- // Get two iterators on in_seq, one read and one write
- string::iterator i_read;
- string::iterator i_write;
- // Determine begin and end of read
- i_read = in_seq_data.begin() + uBeginIdx;
- string::iterator i_read_end = i_read + uLength;
- // Loop through in_seq for uLength bases
- // and shift uBeginIdx to beginning
- for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
- (*i_write) = (*i_read);
- ++i_read;
- }
- // Resize in_seq_data to uLength
- in_seq_data.resize(uLength);
- return uLength;
- }
- // Method to implement Keep for Ncbistdaa
- TSeqPos CSeqportUtil_implementation::KeepNcbistdaa
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get a reference to in_seq
- vector<char>& in_seq_data = in_seq->SetNcbistdaa().Set();
- // If uBeginIdx past end of in_seq, return empty in_seq
- if(uBeginIdx >= in_seq_data.size())
- {
- in_seq_data.clear();
- return 0;
- }
- // If uLength is 0, Keep from uBeginIdx to end of in_seq
- if(uLength == 0)
- uLength = in_seq_data.size() - uBeginIdx;
- // Check that uLength does not go beyond end of in_seq
- if((uBeginIdx + uLength) > in_seq_data.size())
- uLength = in_seq_data.size() - uBeginIdx;
- // If uBeginIdx == 0 and uLength == in_seq_data.size()
- // just return as the entire sequence is being requested
- if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
- return uLength;
- // Get two iterators on in_seq, one read and one write
- vector<char>::iterator i_read;
- vector<char>::iterator i_write;
- // Determine begin and end of read
- i_read = in_seq_data.begin() + uBeginIdx;
- vector<char>::iterator i_read_end = i_read + uLength;
- // Loop through in_seq for uLength bases
- // and shift uBeginIdx to beginning
- for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
- (*i_write) = (*i_read);
- ++i_read;
- }
- // Resize in_seq_data to uLength
- in_seq_data.resize(uLength);
- return uLength;
- }
- // Method to implement Keep for Iupacaa
- TSeqPos CSeqportUtil_implementation::KeepIupacaa
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get a reference to in_seq
- string& in_seq_data = in_seq->SetIupacaa().Set();
- // If uBeginIdx past end of in_seq, return empty in_seq
- if (uBeginIdx >= in_seq_data.size()) {
- in_seq_data.erase();
- return 0;
- }
- // If uLength is 0, Keep from uBeginIdx to end of in_seq
- if(uLength == 0)
- uLength = in_seq_data.size() - uBeginIdx;
- // Check that uLength does not go beyond end of in_seq
- if((uBeginIdx + uLength) > in_seq_data.size())
- uLength = in_seq_data.size() - uBeginIdx;
- // If uBeginIdx == 0 and uLength == in_seq_data.size()
- // just return as the entire sequence is being requested
- if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
- return uLength;
- // Get two iterators on in_seq, one read and one write
- string::iterator i_read;
- string::iterator i_write;
- // Determine begin and end of read
- i_read = in_seq_data.begin() + uBeginIdx;
- string::iterator i_read_end = i_read + uLength;
- // Loop through in_seq for uLength bases
- // and shift uBeginIdx to beginning
- for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
- (*i_write) = (*i_read);
- ++i_read;
- }
- // Resize in_seq_data to uLength
- in_seq_data.resize(uLength);
- return uLength;
- }
- // Methods to complement na sequences
- // In place methods
- TSeqPos CSeqportUtil_implementation::ComplementIupacna
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Keep just the part of in_seq that will be complemented
- TSeqPos uKept = KeepIupacna(in_seq, uBeginIdx, uLength);
- // Get in_seq data
- string& in_seq_data = in_seq->SetIupacna().Set();
- // Get an iterator to in_seq_data
- string::iterator i_data;
- // Get end of iteration--needed for performance
- string::iterator i_data_end = in_seq_data.end();
- // Loop through the input sequence and complement it
- for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
- (*i_data) =
- m_Iupacna_complement->m_Table[static_cast<unsigned char>(*i_data)];
- return uKept;
- }
- TSeqPos CSeqportUtil_implementation::ComplementNcbi2na
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Keep just the part of in_seq that will be complemented
- TSeqPos uKept = KeepNcbi2na(in_seq, uBeginIdx, uLength);
- // Get in_seq data
- vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
- // Get an iterator to in_seq_data
- vector<char>::iterator i_data;
- // Get end of iteration
- vector<char>::iterator i_data_end = in_seq_data.end();
- // Loop through the input sequence and complement it
- for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
- (*i_data) =
- m_Ncbi2naComplement->m_Table[static_cast<unsigned char>(*i_data)];
- return uKept;
- }
- TSeqPos CSeqportUtil_implementation::ComplementNcbi4na
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Keep just the part of in_seq that will be complemented
- TSeqPos uKept = KeepNcbi4na(in_seq, uBeginIdx, uLength);
- // Get in_seq data
- vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
- // Get an iterator to in_seq_data
- vector<char>::iterator i_data;
- // Get end of iteration--done for performance
- vector<char>::iterator i_data_end = in_seq_data.end();
- // Loop through the input sequence and complement it
- for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
- (*i_data) =
- m_Ncbi4naComplement->m_Table[static_cast<unsigned char>(*i_data)];
- return uKept;
- }
- // Complement in copy methods
- TSeqPos CSeqportUtil_implementation::ComplementIupacna
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- TSeqPos uKept = GetIupacnaCopy(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx1 = 0, uIdx2 = 0;
- ComplementIupacna(out_seq, uIdx1, uIdx2);
- return uKept;
- }
- TSeqPos CSeqportUtil_implementation::ComplementNcbi2na
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- TSeqPos uKept = GetNcbi2naCopy(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx1 = 0, uIdx2 = 0;
- ComplementNcbi2na(out_seq, uIdx1, uIdx2);
- return uKept;
- }
- TSeqPos CSeqportUtil_implementation::ComplementNcbi4na
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- TSeqPos uKept = GetNcbi4naCopy(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx1 = 0, uIdx2 = 0;
- ComplementNcbi4na(out_seq, uIdx1, uIdx2);
- return uKept;
- }
- // Methods to reverse na sequences
- // In place methods
- TSeqPos CSeqportUtil_implementation::ReverseIupacna
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Keep just the part of in_seq that will be reversed
- TSeqPos uKept = KeepIupacna(in_seq, uBeginIdx, uLength);
- // Get in_seq data
- string& in_seq_data = in_seq->SetIupacna().Set();
- // Reverse the order of the string
- reverse(in_seq_data.begin(), in_seq_data.end());
- return uKept;
- }
- TSeqPos CSeqportUtil_implementation::ReverseNcbi2na
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get a reference to in_seq data
- vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
- // Validate and adjust uBeginIdx and uLength
- if(uBeginIdx >= 4*in_seq_data.size())
- {
- in_seq_data.erase(in_seq_data.begin(), in_seq_data.end());
- return 0;
- }
- // If uLength is zero, set to end of sequence
- if(uLength == 0)
- uLength = 4*in_seq_data.size() - uBeginIdx;
- // Ensure that uLength not beyond end of sequence
- if((uBeginIdx + uLength) > (4 * in_seq_data.size()))
- uLength = 4*in_seq_data.size() - uBeginIdx;
- // Determine start and end bytes
- TSeqPos uStart = uBeginIdx/4;
- TSeqPos uEnd = uStart + (uLength - 1 +(uBeginIdx % 4))/4 + 1;
- // Declare an iterator and get end of sequence
- vector<char>::iterator i_in;
- vector<char>::iterator i_in_begin = in_seq_data.begin() + uStart;
- vector<char>::iterator i_in_end = in_seq_data.begin() + uEnd;
- // Loop through in_seq_data and reverse residues in each byte
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*i_in) = m_Ncbi2naRev->m_Table[static_cast<unsigned char>(*i_in)];
- // Reverse the bytes in the sequence
- reverse(i_in_begin, i_in_end);
- // Keep just the requested part of the sequence
- TSeqPos uJagged = 3 - ((uBeginIdx + uLength - 1) % 4) + 4*uStart;
- return KeepNcbi2na(in_seq, uJagged, uLength);
- }
- TSeqPos CSeqportUtil_implementation::ReverseNcbi4na
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- // Get a reference to in_seq data
- vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
- // Validate and adjust uBeginIdx and uLength
- if(uBeginIdx >= 2*in_seq_data.size())
- {
- in_seq_data.erase(in_seq_data.begin(), in_seq_data.end());
- return 0;
- }
- // If uLength is zero, set to end of sequence
- if(uLength == 0)
- uLength = 2*in_seq_data.size() - uBeginIdx;
- // Ensure that uLength not beyond end of sequence
- if((uBeginIdx + uLength) > (2 * in_seq_data.size()))
- uLength = 2*in_seq_data.size() - uBeginIdx;
- // Determine start and end bytes
- TSeqPos uStart = uBeginIdx/2;
- TSeqPos uEnd = uStart + (uLength - 1 +(uBeginIdx % 2))/2 + 1;
- // Declare an iterator and get end of sequence
- vector<char>::iterator i_in;
- vector<char>::iterator i_in_begin = in_seq_data.begin() + uStart;
- vector<char>::iterator i_in_end = in_seq_data.begin() + uEnd;
- // Loop through in_seq_data and reverse residues in each byte
- for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
- (*i_in) = m_Ncbi4naRev->m_Table[static_cast<unsigned char>(*i_in)];
- // Reverse the bytes in the sequence
- reverse(i_in_begin, i_in_end);
- // Keep just the requested part of the sequence
- TSeqPos uJagged = 1 - ((uBeginIdx + uLength - 1) % 2) + 2*uStart;
- return KeepNcbi4na(in_seq, uJagged, uLength);
- }
- // Reverse in copy methods
- TSeqPos CSeqportUtil_implementation::ReverseIupacna
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- GetIupacnaCopy(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx1 = 0, uIdx2 = uLength;
- return ReverseIupacna(out_seq, uIdx1, uIdx2);
- }
- TSeqPos CSeqportUtil_implementation::ReverseNcbi2na
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- GetNcbi2naCopy(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx1 = 0, uIdx2 = uLength;
- return ReverseNcbi2na(out_seq, uIdx1, uIdx2);
- }
- TSeqPos CSeqportUtil_implementation::ReverseNcbi4na
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- GetNcbi4naCopy(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx1 = 0, uIdx2 = uLength;
- return ReverseNcbi4na(out_seq, uIdx1, uIdx2);
- }
- // Methods to reverse-complement an na sequences
- // In place methods
- TSeqPos CSeqportUtil_implementation::ReverseComplementIupacna
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- ReverseIupacna(in_seq, uBeginIdx, uLength);
- TSeqPos uIdx = 0;
- return ComplementIupacna(in_seq, uIdx, uLength);
- }
- TSeqPos CSeqportUtil_implementation::ReverseComplementNcbi2na
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- ReverseNcbi2na(in_seq, uBeginIdx, uLength);
- TSeqPos uIdx = 0;
- return ComplementNcbi2na(in_seq, uIdx, uLength);
- }
- TSeqPos CSeqportUtil_implementation::ReverseComplementNcbi4na
- (CSeq_data* in_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- ReverseNcbi4na(in_seq, uBeginIdx, uLength);
- TSeqPos uIdx = 0;
- return ComplementNcbi4na(in_seq, uIdx, uLength);
- }
- // Reverse in copy methods
- TSeqPos CSeqportUtil_implementation::ReverseComplementIupacna
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- ReverseIupacna(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx = 0;
- return ComplementIupacna(out_seq, uIdx, uLength);
- }
- TSeqPos CSeqportUtil_implementation::ReverseComplementNcbi2na
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- ReverseNcbi2na(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx = 0;
- return ComplementNcbi2na(out_seq, uIdx, uLength);
- }
- TSeqPos CSeqportUtil_implementation::ReverseComplementNcbi4na
- (const CSeq_data& in_seq,
- CSeq_data* out_seq,
- TSeqPos uBeginIdx,
- TSeqPos uLength)
- const
- {
- ReverseNcbi4na(in_seq, out_seq, uBeginIdx, uLength);
- TSeqPos uIdx = 0;
- return ComplementNcbi4na(out_seq, uIdx, uLength);
- }
- // Append methods
- TSeqPos CSeqportUtil_implementation::AppendIupacna
- (CSeq_data* out_seq,
- const CSeq_data& in_seq1,
- TSeqPos uBeginIdx1,
- TSeqPos uLength1,
- const CSeq_data& in_seq2,
- TSeqPos uBeginIdx2,
- TSeqPos uLength2)
- const
- {
- // Get references to in_seqs
- const string& in_seq1_data = in_seq1.GetIupacna().Get();
- const string& in_seq2_data = in_seq2.GetIupacna().Get();
- // Get a reference to out_seq
- out_seq->Reset();
- string& out_seq_data = out_seq->SetIupacna().Set();
- // Validate and Adjust uBeginIdx_ and uLength_
- if((uBeginIdx1 >= in_seq1_data.size()) &&
- (uBeginIdx2 >= in_seq2_data.size()))
- return 0;
- if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
- uLength1 = in_seq1_data.size() - uBeginIdx1;
- if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
- uLength2 = in_seq2_data.size() - uBeginIdx2;
- // Append the strings
- out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
- out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
- return uLength1 + uLength2;
- }
- TSeqPos CSeqportUtil_implementation::AppendNcbi2na
- (CSeq_data* out_seq,
- const CSeq_data& in_seq1,
- TSeqPos uBeginIdx1,
- TSeqPos uLength1,
- const CSeq_data& in_seq2,
- TSeqPos uBeginIdx2,
- TSeqPos uLength2)
- const
- {
- // Get references to in_seqs
- const vector<char>& in_seq1_data = in_seq1.GetNcbi2na().Get();
- const vector<char>& in_seq2_data = in_seq2.GetNcbi2na().Get();
- // Get a reference to out_seq
- out_seq->Reset();
- vector<char>& out_seq_data = out_seq->SetNcbi2na().Set();
- // Handle case where both uBeginidx go beyond in_seq
- if((uBeginIdx1 >= 4*in_seq1_data.size()) &&
- (uBeginIdx2 >= 4*in_seq2_data.size()))
- return 0;
- // Handle case where uBeginIdx1 goes beyond end of in_seq1
- if(uBeginIdx1 >= 4*in_seq1_data.size())
- return GetNcbi2naCopy(in_seq2, out_seq, uBeginIdx2, uLength2);
- // Handle case where uBeginIdx2 goes beyond end of in_seq2
- if(uBeginIdx2 >= 4*in_seq2_data.size())
- return GetNcbi2naCopy(in_seq1, out_seq, uBeginIdx1, uLength1);
- // Validate and Adjust uBeginIdx_ and uLength_
- if(((uBeginIdx1 + uLength1) > 4*in_seq1_data.size()) || uLength1 == 0)
- uLength1 = 4*in_seq1_data.size() - uBeginIdx1;
- if(((uBeginIdx2 + uLength2) > 4*in_seq2_data.size()) || uLength2 == 0)
- uLength2 = 4*in_seq2_data.size() - uBeginIdx2;
- // Resize out_seq_data to hold appended sequence
- TSeqPos uTotalLength = uLength1 + uLength2;
- if((uTotalLength % 4) == 0)
- out_seq_data.resize(uTotalLength/4);
- else
- out_seq_data.resize(uTotalLength/4 + 1);
- // Calculate bit shifts required for in_seq1
- unsigned int lShift1 = 2*(uBeginIdx1 % 4);
- unsigned int rShift1 = 8 - lShift1;
- // Calculate bit shifts required for in_seq2
- unsigned int lShift2, rShift2, uCase;
- unsigned int uVacantIdx = 2*(uLength1 % 4);
- unsigned int uStartIdx = 2*(uBeginIdx2 % 4);
- if((uVacantIdx < uStartIdx) && (uVacantIdx > 0))
- {
- uCase = 0;
- lShift2 = uStartIdx - uVacantIdx;
- rShift2 = 8 - lShift2;
- }
- else if((uVacantIdx < uStartIdx) && (uVacantIdx == 0))
- {
- uCase = 1;
- lShift2 = uStartIdx;
- rShift2 = 8 - lShift2;
- }
- else if((uVacantIdx == uStartIdx) && (uVacantIdx > 0))
- {
- uCase = 2;
- lShift2 = 0;
- rShift2 = 8;
- }
- else if((uVacantIdx == uStartIdx) && (uVacantIdx == 0))
- {
- uCase = 3;
- lShift2 = 0;
- rShift2 = 8;
- }
- else
- {
- uCase = 4;
- rShift2 = uVacantIdx - uStartIdx;
- lShift2 = 8 - rShift2;
- }
- // Determine begin and end points for iterators.
- TSeqPos uStart1 = uBeginIdx1/4;
- TSeqPos uEnd1;
- if(((uBeginIdx1 + uLength1) % 4) == 0)
- uEnd1 = (uBeginIdx1 + uLength1)/4;
- else
- uEnd1 = (uBeginIdx1 + uLength1)/4 + 1;
- TSeqPos uStart2 = uBeginIdx2/4;
- TSeqPos uEnd2;
- if(((uBeginIdx2 + uLength2) % 4) == 0)
- uEnd2 = (uBeginIdx2 + uLength2)/4;
- else
- uEnd2 = (uBeginIdx2 + uLength2)/4 + 1;
- // Get begin and end positions on in_seqs
- vector<char>::const_iterator i_in1_begin = in_seq1_data.begin() + uStart1;
- vector<char>::const_iterator i_in1_end = in_seq1_data.begin() + uEnd1 - 1;
- vector<char>::const_iterator i_in2_begin = in_seq2_data.begin() + uStart2;
- vector<char>::const_iterator i_in2_end = in_seq2_data.begin() + uEnd2;
- // Declare iterators
- vector<char>::iterator i_out = out_seq_data.begin() - 1;
- vector<char>::const_iterator i_in1;
- vector<char>::const_iterator i_in2;
- // Insert in_seq1 into out_seq
- for(i_in1 = i_in1_begin; i_in1 != i_in1_end; ++i_in1)
- (*(++i_out)) = ((*i_in1) << lShift1) | ((*(i_in1+1) & 255) >> rShift1);
- // Handle last byte for in_seq1 if necessary
- TSeqPos uEndOutByte;
- if((uLength1 % 4) == 0)
- uEndOutByte = uLength1/4 - 1;
- else
- uEndOutByte = uLength1/4;
- if(i_out != (out_seq_data.begin() + uEndOutByte))
- (*(++i_out)) = (*i_in1) << lShift1;
- // Connect in_seq1 and in_seq2
- unsigned char uMask1 = 255 << (8 - 2*(uLength1 % 4));
- unsigned char uMask2 = 255 >> (2*(uBeginIdx2 % 4));
- TSeqPos uSeq2Inc = 1;
- switch (uCase) {
- case 0: // 0 < uVacantIdx < uStartIdx
- if((i_in2_begin + 1) == i_in2_end)
- {
- (*i_out) &= uMask1;
- (*i_out) |= ((*i_in2_begin) & uMask2) << lShift2;
- return uTotalLength;
- }
- else
- {
- (*i_out) &= uMask1;
- (*i_out) |=
- (((*i_in2_begin) & uMask2) << lShift2) |
- (((*(i_in2_begin+1)) & 255) >> rShift2);
- }
- break;
- case 1: // 0 == uVacantIdx < uStartIdx
- if((i_in2_begin + 1) == i_in2_end)
- {
- (*(++i_out)) = (*i_in2_begin) << lShift2;
- return uTotalLength;
- }
- else
- {
- (*(++i_out)) =
- ((*i_in2_begin) << lShift2) |
- (((*(i_in2_begin+1)) & 255) >> rShift2);
- }
- break;
- case 2: // uVacantIdx == uStartIdx > 0
- (*i_out) &= uMask1;
- (*i_out) |= (*i_in2_begin) & uMask2;
- if((i_in2_begin + 1) == i_in2_end)
- return uTotalLength;
- break;
- case 3: // uVacantIdx == uStartIdx == 0
- (*(++i_out)) = (*i_in2_begin);
- if((i_in2_begin + 1) == i_in2_end)
- return uTotalLength;
- break;
- case 4: // uVacantIdx > uStartIdx
- if((i_in2_begin + 1) == i_in2_end)
- {
- (*i_out) &= uMask1;
- (*i_out) |= ((*i_in2_begin) & uMask2) >> rShift2;
- if(++i_out != out_seq_data.end())
- (*i_out) = (*i_in2_begin) << lShift2;
- return uTotalLength;
- }
- else
- {
- (*i_out) &= uMask1;
- (*i_out) |=
- (((*i_in2_begin) & uMask2) >> rShift2) |
- ((*(i_in2_begin+1) & ~uMask2) << lShift2);
- uSeq2Inc = 0;
- }
- }
- // Insert in_seq2 into out_seq
- for(i_in2 = i_in2_begin+uSeq2Inc; (i_in2 != i_in2_end) &&
- ((i_in2+1) != i_in2_end); ++i_in2) {
- (*(++i_out)) = ((*i_in2) << lShift2) | ((*(i_in2+1) & 255) >> rShift2);
- }
- // Handle last byte for in_seq2, if there is one
- if((++i_out != out_seq_data.end()) && (i_in2 != i_in2_end))
- (*i_out) = (*i_in2) << lShift2;
- return uLength1 + uLength2;
- }
- TSeqPos CSeqportUtil_implementation::AppendNcbi4na
- (CSeq_data* out_seq,
- const CSeq_data& in_seq1,
- TSeqPos uBeginIdx1,
- TSeqPos uLength1,
- const CSeq_data& in_seq2,
- TSeqPos uBeginIdx2,
- TSeqPos uLength2)
- const
- {
- // Get references to in_seqs
- const vector<char>& in_seq1_data = in_seq1.GetNcbi4na().Get();
- const vector<char>& in_seq2_data = in_seq2.GetNcbi4na().Get();
- // Get a reference to out_seq
- out_seq->Reset();
- vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
- // Handle both uBeginidx go beyond end of in_seq
- if((uBeginIdx1 >= 4*in_seq1_data.size()) &&
- (uBeginIdx2 >= 4*in_seq2_data.size()))
- return 0;
- // Handle case where uBeginIdx1 goes beyond end of in_seq1
- if(uBeginIdx1 >= 4*in_seq1_data.size())
- return GetNcbi4naCopy(in_seq2, out_seq, uBeginIdx2, uLength2);
- // Handle case where uBeginIdx2 goes beyond end of in_seq2
- if(uBeginIdx2 >= 4*in_seq2_data.size())
- return GetNcbi4naCopy(in_seq1, out_seq, uBeginIdx1, uLength1);
- // Validate and Adjust uBeginIdx_ and uLength_
- if(((uBeginIdx1 + uLength1) > 2*in_seq1_data.size()) || uLength1 == 0)
- uLength1 = 2*in_seq1_data.size() - uBeginIdx1;
- if(((uBeginIdx2 + uLength2) > 2*in_seq2_data.size()) || uLength2 == 0)
- uLength2 = 2*in_seq2_data.size() - uBeginIdx2;
- // Resize out_seq_data to hold appended sequence
- TSeqPos uTotalLength = uLength1 + uLength2;
- if((uTotalLength % 2) == 0)
- out_seq_data.resize(uTotalLength/2);
- else
- out_seq_data.resize(uTotalLength/2 + 1);
- // Calculate bit shifts required for in_seq1
- unsigned int lShift1 = 4*(uBeginIdx1 % 2);
- unsigned int rShift1 = 8 - lShift1;
- // Calculate bit shifts required for in_seq2
- unsigned int lShift2, rShift2, uCase;
- unsigned int uVacantIdx = 4*(uLength1 % 2);
- unsigned int uStartIdx = 4*(uBeginIdx2 % 2);
- if((uVacantIdx < uStartIdx))
- {
- uCase = 1;
- lShift2 = uStartIdx;
- rShift2 = 8 - lShift2;
- }
- else if((uVacantIdx == uStartIdx) && (uVacantIdx > 0))
- {
- uCase = 2;
- lShift2 = 0;
- rShift2 = 8;
- }
- else if((uVacantIdx == uStartIdx) && (uVacantIdx == 0))
- {
- uCase = 3;
- lShift2 = 0;
- rShift2 = 8;
- }
- else
- {
- uCase = 4;
- rShift2 = uVacantIdx - uStartIdx;
- lShift2 = 8 - rShift2;
- }
- // Determine begin and end points for iterators.
- TSeqPos uStart1 = uBeginIdx1/2;
- TSeqPos uEnd1;
- if(((uBeginIdx1 + uLength1) % 2) == 0)
- uEnd1 = (uBeginIdx1 + uLength1)/2;
- else
- uEnd1 = (uBeginIdx1 + uLength1)/2 + 1;
- TSeqPos uStart2 = uBeginIdx2/2;
- TSeqPos uEnd2;
- if(((uBeginIdx2 + uLength2) % 2) == 0)
- uEnd2 = (uBeginIdx2 + uLength2)/2;
- else
- uEnd2 = (uBeginIdx2 + uLength2)/2 + 1;
- // Get begin and end positions on in_seqs
- vector<char>::const_iterator i_in1_begin = in_seq1_data.begin() + uStart1;
- vector<char>::const_iterator i_in1_end = in_seq1_data.begin() + uEnd1 - 1;
- vector<char>::const_iterator i_in2_begin = in_seq2_data.begin() + uStart2;
- vector<char>::const_iterator i_in2_end = in_seq2_data.begin() + uEnd2;
- // Declare iterators
- vector<char>::iterator i_out = out_seq_data.begin() - 1;
- vector<char>::const_iterator i_in1;
- vector<char>::const_iterator i_in2;
- // Insert in_seq1 into out_seq
- for(i_in1 = i_in1_begin; i_in1 != i_in1_end; ++i_in1)
- (*(++i_out)) = ((*i_in1) << lShift1) | ((*(i_in1+1) & 255) >> rShift1);
- // Handle last byte for in_seq1 if necessary
- TSeqPos uEndOutByte;
- if((uLength1 % 2) == 0)
- uEndOutByte = uLength1/2 - 1;
- else
- uEndOutByte = uLength1/2;
- if(i_out != (out_seq_data.begin() + uEndOutByte))
- (*(++i_out)) = (*i_in1) << lShift1;
- // Connect in_seq1 and in_seq2
- unsigned char uMask1 = 255 << (8 - 4*(uLength1 % 2));
- unsigned char uMask2 = 255 >> (4*(uBeginIdx2 % 2));
- TSeqPos uSeq2Inc = 1;
- switch (uCase) {
- case 1: // 0 == uVacantIdx < uStartIdx
- if((i_in2_begin+1) == i_in2_end)
- {
- (*(++i_out)) = (*i_in2_begin) << lShift2;
- return uTotalLength;
- }
- else
- {
- (*(++i_out)) =
- ((*i_in2_begin) << lShift2) |
- (((*(i_in2_begin+1)) & 255) >> rShift2);
- }
- break;
- case 2: // uVacantIdx == uStartIdx > 0
- (*i_out) &= uMask1;
- (*i_out) |= (*i_in2_begin) & uMask2;
- if((i_in2_begin+1) == i_in2_end)
- return uTotalLength;
- break;
- case 3: // uVacantIdx == uStartIdx == 0
- (*(++i_out)) = (*i_in2_begin);
- if((i_in2_begin+1) == i_in2_end)
- return uTotalLength;
- break;
- case 4: // uVacantIdx > uStartIdx
- if((i_in2_begin+1) == i_in2_end)
- {
- (*i_out) &= uMask1;
- (*i_out) |= ((*i_in2_begin) & uMask2) >> rShift2;
- if(++i_out != out_seq_data.end())
- (*i_out) = (*i_in2_begin) << lShift2;
- return uTotalLength;
- }
- else
- {
- (*i_out) &= uMask1;
- (*i_out) |=
- (((*i_in2_begin) & uMask2) >> rShift2) |
- ((*(i_in2_begin+1) & ~uMask2) << lShift2);
- uSeq2Inc = 0;
- }
- }
- // Insert in_seq2 into out_seq
- for(i_in2 = i_in2_begin+uSeq2Inc; (i_in2 != i_in2_end) &&
- ((i_in2+1) != i_in2_end); ++i_in2) {
- (*(++i_out)) =
- ((*i_in2) << lShift2) | ((*(i_in2+1) & 255) >> rShift2);
- }
- // Handle last byte for in_seq2, if there is one
- if((++i_out != out_seq_data.end()) && (i_in2 != i_in2_end))
- (*i_out) = (*i_in2) << lShift2;
- return uTotalLength;
- }
- TSeqPos CSeqportUtil_implementation::AppendNcbieaa
- (CSeq_data* out_seq,
- const CSeq_data& in_seq1,
- TSeqPos uBeginIdx1,
- TSeqPos uLength1,
- const CSeq_data& in_seq2,
- TSeqPos uBeginIdx2,
- TSeqPos uLength2)
- const
- {
- // Get references to in_seqs
- const string& in_seq1_data = in_seq1.GetNcbieaa().Get();
- const string& in_seq2_data = in_seq2.GetNcbieaa().Get();
- // Get a reference to out_seq
- out_seq->Reset();
- string& out_seq_data = out_seq->SetNcbieaa().Set();
- // Validate and Adjust uBeginIdx_ and uLength_
- if((uBeginIdx1 >= in_seq1_data.size()) &&
- (uBeginIdx2 >= in_seq2_data.size()))
- {
- return 0;
- }
- if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
- uLength1 = in_seq1_data.size() - uBeginIdx1;
- if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
- uLength2 = in_seq2_data.size() - uBeginIdx2;
- // Append the strings
- out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
- out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
- return uLength1 + uLength2;
- }
- TSeqPos CSeqportUtil_implementation::AppendNcbistdaa
- (CSeq_data* out_seq,
- const CSeq_data& in_seq1,
- TSeqPos uBeginIdx1,
- TSeqPos uLength1,
- const CSeq_data& in_seq2,
- TSeqPos uBeginIdx2,
- TSeqPos uLength2)
- const
- {
- // Get references to in_seqs
- const vector<char>& in_seq1_data = in_seq1.GetNcbistdaa().Get();
- const vector<char>& in_seq2_data = in_seq2.GetNcbistdaa().Get();
- // Get a reference to out_seq
- out_seq->Reset();
- vector<char>& out_seq_data = out_seq->SetNcbistdaa().Set();
- // Validate and Adjust uBeginIdx_ and uLength_
- if((uBeginIdx1 >= in_seq1_data.size()) &&
- (uBeginIdx2 >= in_seq2_data.size()))
- return 0;
- if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
- uLength1 = in_seq1_data.size() - uBeginIdx1;
- if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
- uLength2 = in_seq2_data.size() - uBeginIdx2;
- // Get begin and end positions on in_seqs
- vector<char>::const_iterator i_in1_begin =
- in_seq1_data.begin() + uBeginIdx1;
- vector<char>::const_iterator i_in1_end = i_in1_begin + uLength1;
- vector<char>::const_iterator i_in2_begin =
- in_seq2_data.begin() + uBeginIdx2;
- vector<char>::const_iterator i_in2_end = i_in2_begin + uLength2;
- // Insert the in_seqs into out_seq
- out_seq_data.insert(out_seq_data.end(), i_in1_begin, i_in1_end);
- out_seq_data.insert(out_seq_data.end(), i_in2_begin, i_in2_end);
- return uLength1 + uLength2;
- }
- TSeqPos CSeqportUtil_implementation::AppendIupacaa
- (CSeq_data* out_seq,
- const CSeq_data& in_seq1,
- TSeqPos uBeginIdx1,
- TSeqPos uLength1,
- const CSeq_data& in_seq2,
- TSeqPos uBeginIdx2,
- TSeqPos uLength2)
- const
- {
- // Get references to in_seqs
- const string& in_seq1_data = in_seq1.GetIupacaa().Get();
- const string& in_seq2_data = in_seq2.GetIupacaa().Get();
- // Get a reference to out_seq
- out_seq->Reset();
- string& out_seq_data = out_seq->SetIupacaa().Set();
- // Validate and Adjust uBeginIdx_ and uLength_
- if((uBeginIdx1 >= in_seq1_data.size()) &&
- (uBeginIdx2 >= in_seq2_data.size()))
- {
- return 0;
- }
- if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
- uLength1 = in_seq1_data.size() - uBeginIdx1;
- if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
- uLength2 = in_seq2_data.size() - uBeginIdx2;
- // Append the strings
- out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
- out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
- return uLength1 + uLength2;
- }
- // Returns the 3 letter Iupacaa3 code for an ncbistdaa index
- const string& CSeqportUtil_implementation::GetIupacaa3
- (TIndex ncbistdaa)
- {
- return GetCodeOrName(eSeq_code_type_iupacaa3, ncbistdaa, true);
- }
- // Returns true if code type is available
- bool CSeqportUtil_implementation::IsCodeAvailable
- (CSeq_data::E_Choice code_type)
- {
- if (code_type == CSeq_data::e_not_set) {
- return false;
- } else {
- return IsCodeAvailable(EChoiceToESeq(code_type));
- }
- }
- // Return true if code type is available
- bool CSeqportUtil_implementation::IsCodeAvailable (ESeq_code_type code_type)
- {
- typedef list<CRef<CSeq_code_table> > Ttables;
-
- // Iterate through Seq-code-set looking for code type
- ITERATE (Ttables, i_ct, m_SeqCodeSet->GetCodes()) {
- if((*i_ct)->GetCode() == code_type) {
- return true;
- }
- }
- return false;
- }
- // Return a pair containing the first index (start-at) and last index
- // for code_type.
- CSeqportUtil::TPair CSeqportUtil_implementation::GetCodeIndexFromTo
- (CSeq_data::E_Choice code_type)
- {
- return GetCodeIndexFromTo(EChoiceToESeq(code_type));
- }
- // Return a pair containing the first index (start-at) and last index
- // for code_type.
- CSeqportUtil::TPair CSeqportUtil_implementation::GetCodeIndexFromTo
- (ESeq_code_type code_type)
- {
- typedef list<CRef<CSeq_code_table> > Ttables;
-
- // Iterate through Seq-code-set looking for code type
- TPair p;
- ITERATE (Ttables, i_ct, m_SeqCodeSet->GetCodes()) {
- if((*i_ct)->GetCode() == code_type) {
- if ( (*i_ct)->IsSetStart_at() ) {
- p.first = static_cast<TIndex>((*i_ct)->GetStart_at());
- } else {
- p.first = 0;
- }
- p.second = p.first + static_cast<TIndex>((*i_ct)->GetNum() - 1);
- return p;
- }
- }
- throw CSeqportUtil::CBadType("GetCodeIndexFromTo");
- }
- // Converts CSeq_data::E_Choice type to ESeq_code_type
- // and calls overloaded GetCodeOrName()
- const string& CSeqportUtil_implementation::GetCodeOrName
- (CSeq_data::E_Choice code_type,
- TIndex idx,
- bool get_code)
- {
- return GetCodeOrName(EChoiceToESeq(code_type), idx, get_code);
- }
- // Returns the code (symbol) of type code_type for index idx.
- const string& CSeqportUtil_implementation::GetCodeOrName
- (ESeq_code_type code_type,
- TIndex idx,
- bool get_code)
- {
- typedef list<CRef<CSeq_code_table> > Ttables;
- typedef list<CRef<CSeq_code_table::C_E> > Tcodes;
- if ( !m_IndexString[get_code][code_type-1].size() ) {
- throw CSeqportUtil::CBadType("GetCodeOrName");
- }
- idx -= m_StartAt[code_type-1];
- if (idx >= m_IndexString[get_code][code_type-1].size()) {
- throw CSeqportUtil::CBadIndex(idx, "GetCodeOrName");
- }
- return m_IndexString[get_code][code_type-1][idx];
-
- }
- // Converts CSeq_data::E_Choice type to ESeq_code_type and call
- // overloaded GetIndex();
- CSeqportUtil::TIndex CSeqportUtil_implementation::GetIndex
- (CSeq_data::E_Choice code_type,
- const string& code)
- {
- return GetIndex(EChoiceToESeq(code_type), code);
- }
- // Get the index for code of type code_type. If not found, return -1
- CSeqportUtil::TIndex CSeqportUtil_implementation::GetIndex
- (ESeq_code_type code_type,
- const string& code)
- {
- typedef list<CRef<CSeq_code_table> > Ttables;
- typedef list<CRef<CSeq_code_table::C_E> > Tcodes;
-
- // Iterator to a map mapping a string code to a code index
- map<string, TIndex>::const_iterator pos;
-
- if ( !m_StringIndex[code_type-1].size() ) {
- throw CSeqportUtil::CBadType("GetIndex");
- }
- pos = m_StringIndex[code_type-1].find(code);
- if (pos != m_StringIndex[code_type-1].end()) {
- return pos->second;
- } else {
- throw CSeqportUtil::CBadSymbol(code, "GetIndex");
- }
-
- }
- // Gets complement of index for code type. Returns -1 if code
- // type does not exist
- CSeqportUtil::TIndex CSeqportUtil_implementation::GetIndexComplement
- (CSeq_data::E_Choice code_type,
- TIndex idx)
- {
- return GetIndexComplement(EChoiceToESeq(code_type), idx);
- }
- // Returns the complement of the index for code_type. If code_type
- // does not exist, or complements for code_type do not exist,
- // returns -1
- CSeqportUtil::TIndex CSeqportUtil_implementation::GetIndexComplement
- (ESeq_code_type code_type,
- TIndex idx)
- {
-
- // Check that code is available
- if (!m_IndexComplement[code_type-1].size()) {
- throw CSeqportUtil::CBadType("GetIndexComplement");
- }
-
- // Check that idx is in range of code indices
- idx -= m_StartAt[code_type-1];
- if ( idx >= m_IndexComplement[code_type-1].size() ) {
- throw CSeqportUtil::CBadIndex(idx, "GetIndexComplement");
- }
-
- // Return the index of the complement
- return m_IndexComplement[code_type-1][idx];
- }
- CSeqportUtil::TIndex CSeqportUtil_implementation::GetMapToIndex
- (CSeq_data::E_Choice from_type,
- CSeq_data::E_Choice to_type,
- TIndex from_idx)
- {
- return GetMapToIndex(EChoiceToESeq(from_type),
- EChoiceToESeq(to_type),
- from_idx);
- }
- CSeqportUtil::TIndex CSeqportUtil_implementation::GetMapToIndex
- (ESeq_code_type from_type,
- ESeq_code_type to_type,
- TIndex from_idx)
- {
- CMap_table* Map = 0;
-
- if (from_type == eSeq_code_type_iupacna) {
- if (to_type == eSeq_code_type_ncbi2na) {
- Map = m_IupacnaNcbi2na.GetPointer();
- } else if (to_type == eSeq_code_type_ncbi4na) {
- Map = m_IupacnaNcbi4na.GetPointer();
- }
- } else if (from_type == eSeq_code_type_ncbi4na) {
- if (to_type == eSeq_code_type_iupacna) {
- Map = m_Ncbi4naIupacna.GetPointer();
- } else if (to_type == eSeq_code_type_ncbi2na) {
- Map = m_Ncbi4naNcbi2na.GetPointer();
- }
- } else if (from_type == eSeq_code_type_ncbi2na) {
- if (to_type == eSeq_code_type_iupacna) {
- Map = m_Ncbi2naIupacna.GetPointer();
- } else if (to_type == eSeq_code_type_ncbi4na) {
- Map = m_Ncbi2naNcbi4na.GetPointer();
- }
- } else if (from_type == eSeq_code_type_iupacaa) {
- if (to_type == eSeq_code_type_ncbieaa) {
- Map = m_IupacaaNcbieaa.GetPointer();
- } else if (to_type == eSeq_code_type_ncbistdaa) {
- Map = m_IupacaaNcbistdaa.GetPointer();
- }
- } else if (from_type == eSeq_code_type_ncbieaa) {
- if (to_type == eSeq_code_type_iupacaa) {
- Map = m_NcbieaaIupacaa.GetPointer();
- } else if (to_type == eSeq_code_type_ncbistdaa) {
- Map = m_NcbieaaNcbistdaa.GetPointer();
- }
- } else if (from_type == eSeq_code_type_ncbistdaa) {
- if (to_type == eSeq_code_type_iupacaa) {
- Map = m_NcbistdaaIupacaa.GetPointer();
- } else if (to_type == eSeq_code_type_ncbieaa) {
- Map = m_NcbistdaaNcbieaa.GetPointer();
- }
- }
-
- // Check that requested map is available
- if (!Map) {
- throw CSeqportUtil::CBadType("GetMapToIndex");
- }
-
- // Check that from_idx is within range of from_type
- if (from_idx - (*Map).m_StartAt >= (TIndex)(*Map).m_Size) {
- throw CSeqportUtil::CBadIndex(from_idx - (*Map).m_StartAt,
- "GetMapToIndex");
- }
-
- // Return map value
- return (*Map).m_Table[from_idx];
-
- }
- void CSeqportUtil_implementation::x_GetSeqFromSeqData
- (const CSeq_data& data,
- const string** str,
- const vector<char>** vec)
- const
- {
- *str = 0;
- *vec = 0;
- switch ( data.Which() ) {
- case CSeq_data::e_Iupacna:
- *str = &(data.GetIupacna().Get());
- break;
- case CSeq_data::e_Ncbi2na:
- *vec = &(data.GetNcbi2na().Get());
- break;
- case CSeq_data::e_Ncbi4na:
- *vec = &(data.GetNcbi4na().Get());
- break;
- case CSeq_data::e_Ncbi8na:
- *vec = &(data.GetNcbi8na().Get());
- break;
- case CSeq_data::e_Iupacaa:
- *str = &(data.GetIupacaa().Get());
- break;
- case CSeq_data::e_Ncbi8aa:
- *vec = &(data.GetNcbi8aa().Get());
- break;
- case CSeq_data::e_Ncbieaa:
- *str = &(data.GetNcbieaa().Get());
- break;
- case CSeq_data::e_Ncbistdaa:
- *vec = &(data.GetNcbistdaa().Get());
- break;
- } // end of switch statemen
- }
- // same as above, but takes a non-const CSeq_data object.
- void CSeqportUtil_implementation::x_GetSeqFromSeqData
- (CSeq_data& data,
- string** str,
- vector<char>** vec)
- const
- {
- *str = 0;
- *vec = 0;
- switch ( data.Which() ) {
- case CSeq_data::e_Iupacna:
- *str = &(data.SetIupacna().Set());
- break;
- case CSeq_data::e_Ncbi2na:
- *vec = &(data.SetNcbi2na().Set());
- break;
- case CSeq_data::e_Ncbi4na:
- *vec = &(data.SetNcbi4na().Set());
- break;
- case CSeq_data::e_Ncbi8na:
- *vec = &(data.SetNcbi8na().Set());
- break;
- case CSeq_data::e_Iupacaa:
- *str = &(data.SetIupacaa().Set());
- break;
- case CSeq_data::e_Ncbi8aa:
- *vec = &(data.SetNcbi8aa().Set());
- break;
- case CSeq_data::e_Ncbieaa:
- *str = &(data.SetNcbieaa().Set());
- break;
- case CSeq_data::e_Ncbistdaa:
- *vec = &(data.SetNcbistdaa().Set());
- break;
- } // end of switch statemen
- }
- /////////////////////////////////////////////////////////////////////////////
- // CSeqportUtil_implementation::sm_StrAsnData -- some very long and ugly string
- //
- // local copy of seqcode.prt sequence alphabet and conversion table ASN.1
- const char* CSeqportUtil_implementation::sm_StrAsnData[] =
- {
- "-- This is the set of NCBI sequence code tablesn",
- "-- J.Ostell 10/18/91n",
- "--n",
- "n",
- "Seq-code-set ::= {n",
- " codes { -- codesn",
- " { -- IUPACnan",
- " code iupacna ,n",
- " num 25 , -- continuous 65-89n",
- " one-letter TRUE , -- all one letter codesn",
- " start-at 65 , -- starts with A, ASCII 65n",
- " table {n",
- " { symbol "A", name "Adenine" },n",
- " { symbol "B" , name "G or T or C" },n",
- " { symbol "C", name "Cytosine" },n",
- " { symbol "D", name "G or A or T" },n",
- " { symbol "", name "" },n",
- " { symbol "", name "" },n",
- " { symbol "G", name "Guanine" },n",
- " { symbol "H", name "A or C or T" } ,n",
- " { symbol "", name "" },n",
- " { symbol "", name "" },n",
- " { symbol "K", name "G or T" },n",
- " { symbol "", name ""},n",
- " { symbol "M", name "A or C" },n",
- " { symbol "N", name "A or G or C or T" } ,n",
- " { symbol "", name "" },n",
- " { symbol "", name "" },n",
- " { symbol "", name ""},n",
- " { symbol "R", name "G or A"},n",
- " { symbol "S", name "G or C"},n",
- " { symbol "T", name "Thymine"},n",
- " { symbol "", name ""},n",
- " { symbol "V", name "G or C or A"},n",
- " { symbol "W", name "A or T" },n",
- " { symbol "", name ""},n",
- " { symbol "Y", name "T or C"}n",
- " } , -- end of tablen",
- " comps { -- complementsn",
- " 84,n",
- " 86,n",
- " 71,n",
- " 72,n",
- " 69,n",
- " 70,n",
- " 67,n",
- " 68,n",
- " 73,n",
- " 74,n",
- " 77,n",
- " 76,n",
- " 75,n",
- " 78,n",
- " 79,n",
- " 80,n",
- " 81,n",
- " 89,n",
- " 83,n",
- " 65,n",
- " 85,n",
- " 66,n",
- " 87,n",
- " 88,n",
- " 82n",
- " }n",
- " },n",
- " { -- IUPACaan",
- " code iupacaa ,n",
- " num 26 , -- continuous 65-90n",
- " one-letter TRUE , -- all one letter codesn",
- " start-at 65 , -- starts with A, ASCII 65n",
- " table {n",
- " { symbol "A", name "Alanine" },n",
- " { symbol "B" , name "Asp or Asn" },n",
- " { symbol "C", name "Cysteine" },n",
- " { symbol "D", name "Aspartic Acid" },n",
- " { symbol "E", name "Glutamic Acid" },n",
- " { symbol "F", name "Phenylalanine" },n",
- " { symbol "G", name "Glycine" },n",
- " { symbol "H", name "Histidine" } ,n",
- " { symbol "I", name "Isoleucine" },n",
- " { symbol "", name "" },n",
- " { symbol "K", name "Lysine" },n",
- " { symbol "L", name "Leucine" },n",
- " { symbol "M", name "Methionine" },n",
- " { symbol "N", name "Asparagine" } ,n",
- " { symbol "", name "" },n",
- " { symbol "P", name "Proline" },n",
- " { symbol "Q", name "Glutamine"},n",
- " { symbol "R", name "Arginine"},n",
- " { symbol "S", name "Serine"},n",
- " { symbol "T", name "Threonine"},n",
- " { symbol "U", name "Selenocysteine"}, -- was emptyn"
- " { symbol "V", name "Valine"},n",
- " { symbol "W", name "Tryptophan" },n",
- " { symbol "X", name "Undetermined or atypical"},n",
- " { symbol "Y", name "Tyrosine"},n",
- " { symbol "Z", name "Glu or Gln" }n",
- " } -- end of table n",
- " },n",
- " { -- IUPACeaan",
- " code ncbieaa ,n",
- " num 49 , -- continuous 42-90n",
- " one-letter TRUE , -- all one letter codesn",
- " start-at 42 , -- starts with *, ASCII 42n",
- " table {n",
- " { symbol "*", name "Termination" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "-", name "Gap" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "", name "" } ,n",
- " { symbol "A", name "Alanine" },n",
- " { symbol "B" , name "Asp or Asn" },n",
- " { symbol "C", name "Cysteine" },n",
- " { symbol "D", name "Aspartic Acid" },n",
- " { symbol "E", name "Glutamic Acid" },n",
- " { symbol "F", name "Phenylalanine" },n",
- " { symbol "G", name "Glycine" },n",
- " { symbol "H", name "Histidine" } ,n",
- " { symbol "I", name "Isoleucine" },n",
- " { symbol "", name "" },n",
- " { symbol "K", name "Lysine" },n",
- " { symbol "L", name "Leucine" },n",
- " { symbol "M", name "Methionine" },n",
- " { symbol "N", name "Asparagine" } ,n",
- " { symbol "", name "" },n",
- " { symbol "P", name "Proline" },n",
- " { symbol "Q", name "Glutamine"},n",
- " { symbol "R", name "Arginine"},n",
- " { symbol "S", name "Serine"},n",
- " { symbol "T", name "Threonine"},n",
- " { symbol "U", name "Selenocysteine"},n",
- " { symbol "V", name "Valine"},n",
- " { symbol "W", name "Tryptophan" },n",
- " { symbol "X", name "Undetermined or atypical"},n",
- " { symbol "Y", name "Tyrosine"},n",
- " { symbol "Z", name "Glu or Gln" }n",
- " } -- end of tablen",
- " },n",
- " { -- IUPACaa3n",
- " code iupacaa3 ,n",
- " num 26 , -- continuous 0-25n",
- " one-letter FALSE , -- all 3 letter codesn",
- " table {n",
- " { symbol "---", name "Gap" } ,n",
- " { symbol "Ala", name "Alanine" },n",
- " { symbol "Asx" , name "Asp or Asn" },n",
- " { symbol "Cys", name "Cysteine" },n",
- " { symbol "Asp", name "Aspartic Acid" },n",
- " { symbol "Glu", name "Glutamic Acid" },n",
- " { symbol "Phe", name "Phenylalanine" },n",
- " { symbol "Gly", name "Glycine" },n",
- " { symbol "His", name "Histidine" } ,n",
- " { symbol "Ile", name "Isoleucine" },n",
- " { symbol "Lys", name "Lysine" },n",
- " { symbol "Leu", name "Leucine" },n",
- " { symbol "Met", name "Methionine" },n",
- " { symbol "Asn", name "Asparagine" } ,n",
- " { symbol "Pro", name "Proline" },n",
- " { symbol "Gln", name "Glutamine"},n",
- " { symbol "Arg", name "Arginine"},n",
- " { symbol "Ser", name "Serine"},n",
- " { symbol "Thr", name "Threonine"},n",
- " { symbol "Val", name "Valine"},n",
- " { symbol "Trp", name "Tryptophan" },n",
- " { symbol "Xxx", name "Undetermined or atypical"},n",
- " { symbol "Tyr", name "Tyrosine"},n",
- " { symbol "Glx", name "Glu or Gln" },n",
- " { symbol "Sec", name "Selenocysteine"},n",
- " { symbol "Ter", name "Termination" } n",
- " } -- end of tablen",
- " },n",
- " { -- NCBIstdaan",
- " code ncbistdaa ,n",
- " num 26 , -- continuous 0-25n",
- " one-letter TRUE , -- all one letter codesn",
- " table {n",
- " { symbol "-", name "Gap" } , -- 0n",
- " { symbol "A", name "Alanine" }, -- 1n",
- " { symbol "B" , name "Asp or Asn" }, -- 2n",
- " { symbol "C", name "Cysteine" }, -- 3n",
- " { symbol "D", name "Aspartic Acid" }, -- 4n",
- " { symbol "E", name "Glutamic Acid" }, -- 5n",
- " { symbol "F", name "Phenylalanine" }, -- 6n",
- " { symbol "G", name "Glycine" }, -- 7n",
- " { symbol "H", name "Histidine" } , -- 8n",
- " { symbol "I", name "Isoleucine" }, -- 9n",
- " { symbol "K", name "Lysine" }, -- 10n",
- " { symbol "L", name "Leucine" }, -- 11n",
- " { symbol "M", name "Methionine" }, -- 12n",
- " { symbol "N", name "Asparagine" } , -- 13n",
- " { symbol "P", name "Proline" }, -- 14n",
- " { symbol "Q", name "Glutamine"}, -- 15n",
- " { symbol "R", name "Arginine"}, -- 16n",
- " { symbol "S", name "Serine"}, -- 17n",
- " { symbol "T", name "Threoine"}, -- 18n",
- " { symbol "V", name "Valine"}, -- 19n",
- " { symbol "W", name "Tryptophan" }, -- 20n",
- " { symbol "X", name "Undetermined or atypical"}, -- 21n",
- " { symbol "Y", name "Tyrosine"}, -- 22n",
- " { symbol "Z", name "Glu or Gln" }, -- 23n",
- " { symbol "U", name "Selenocysteine"}, -- 24 n",
- " { symbol "*", name "Termination" } -- 25n",
- " } -- end of table n",
- " },n",
- " { -- NCBI2nan",
- " code ncbi2na ,n",
- " num 4 , -- continuous 0-3n",
- " one-letter TRUE , -- all one letter codesn",
- " table {n",
- " { symbol "A", name "Adenine" },n",
- " { symbol "C", name "Cytosine" },n",
- " { symbol "G", name "Guanine" },n",
- " { symbol "T", name "Thymine/Uracil"}n",
- " } , -- end of tablen",
- " comps { -- complementsn",
- " 3,n",
- " 2,n",
- " 1,n",
- " 0n",
- " }n",
- " },n",
- " { -- NCBI4nan",
- " code ncbi4na ,n",
- " num 16 , -- continuous 0-15n",
- " one-letter TRUE , -- all one letter codesn",
- " table {n",
- " { symbol "-", name "Gap" } ,n",
- " { symbol "A", name "Adenine" },n",
- " { symbol "C", name "Cytosine" },n",
- " { symbol "M", name "A or C" },n",
- " { symbol "G", name "Guanine" },n",
- " { symbol "R", name "G or A"},n",
- " { symbol "S", name "G or C"},n",
- " { symbol "V", name "G or C or A"},n",
- " { symbol "T", name "Thymine/Uracil"},n",
- " { symbol "W", name "A or T" },n",
- " { symbol "Y", name "T or C"} ,n",
- " { symbol "H", name "A or C or T" } ,n",
- " { symbol "K", name "G or T" },n",
- " { symbol "D", name "G or A or T" },n",
- " { symbol "B" , name "G or T or C" },n",
- " { symbol "N", name "A or G or C or T" }n",
- " } , -- end of tablen",
- " comps { -- complementsn",
- " 0 ,n",
- " 8 ,n",
- " 4 ,n",
- " 12,n",
- " 2 ,n",
- " 10,n",
- " 6 ,n",
- " 14,n",
- " 1 ,n",
- " 9 ,n",
- " 5 ,n",
- " 13,n",
- " 3 ,n",
- " 11,n",
- " 7 ,n",
- " 15n",
- " }n",
- " } n",
- " }, -- end of codesn",
- " maps {n",
- " {n",
- " from iupacna ,n",
- " to ncbi2na ,n",
- " num 25 ,n",
- " start-at 65 ,n",
- " table {n",
- " 0, -- A -> An",
- " 1, -- B -> Cn",
- " 1, -- C -> Cn",
- " 2, -- D -> Gn",
- " 255,n",
- " 255,n",
- " 2, -- G -> Gn",
- " 0, -- H -> An",
- " 255,n",
- " 255,n",
- " 2, -- K -> Gn",
- " 255,n",
- " 1, -- M -> Cn",
- " 0, -- N -> An",
- " 255,n",
- " 255,n",
- " 255,n",
- " 2, -- R -> Gn",
- " 1, -- S -> Cn",
- " 3, -- T -> Tn",
- " 255,n",
- " 0, -- V -> An",
- " 3, -- W -> Tn",
- " 255,n",
- " 3 } -- Y -> Tn",
- " }, n",
- " {n",
- " from iupacna ,n",
- " to ncbi4na ,n",
- " num 25 ,n",
- " start-at 65 ,n",
- " table {n",
- " 1, -- An",
- " 14, -- Bn",
- " 2, -- Cn",
- " 13, -- Dn",
- " 255,n",
- " 255,n",
- " 4, -- Gn",
- " 11, -- Hn",
- " 255,n",
- " 255,n",
- " 12, -- Kn",
- " 255,n",
- " 3, -- Mn",
- " 15, -- Nn",
- " 255,n",
- " 255,n",
- " 255,n",
- " 5, -- Rn",
- " 6, -- Sn",
- " 8, -- Tn",
- " 255,n",
- " 7, -- Vn",
- " 9, -- Wn",
- " 255,n",
- " 10 } -- Yn",
- " }, n",
- " {n",
- " from ncbi2na ,n",
- " to iupacna ,n",
- " num 4 ,n",
- " table {n",
- " 65, -- An",
- " 67, -- Cn",
- " 71, -- Gn",
- " 84 } -- Tn",
- " } ,n",
- " {n",
- " from ncbi2na ,n",
- " to ncbi4na ,n",
- " num 4 ,n",
- " table {n",
- " 1, -- An",
- " 2, -- Cn",
- " 4, -- Gn",
- " 8 } -- Tn",
- " } , n",
- " {n",
- " from ncbi4na ,n",
- " to iupacna ,n",
- " num 16 ,n",
- " table {n",
- " 78, -- gap -> Nn",
- " 65, -- An",
- " 67, -- Cn",
- " 77, -- Mn",
- " 71, -- Gn",
- " 82, -- Rn",
- " 83, -- Sn",
- " 86, -- Vn",
- " 84, -- Tn",
- " 87, -- Wn",
- " 89, -- Yn",
- " 72, -- Hn",
- " 75, -- Kn",
- " 68, -- Dn",
- " 66, -- Bn",
- " 78 } -- Nn",
- " } ,n",
- " {n",
- " from ncbi4na ,n",
- " to ncbi2na ,n",
- " num 16 ,n",
- " table {n",
- " 3, -- gap -> Tn",
- " 0, -- A -> An",
- " 1, -- C -> Cn",
- " 1, -- M -> Cn",
- " 2, -- G -> Gn",
- " 2, -- R -> Gn",
- " 1, -- S -> Cn",
- " 0, -- V -> An",
- " 3, -- T -> Tn",
- " 3, -- W -> Tn",
- " 3, -- Y -> Tn",
- " 0, -- H -> An",
- " 2, -- K -> Gn",
- " 2, -- D -> Gn",
- " 1, -- B -> Cn",
- " 0 } -- N -> An",
- " } ,n",
- " {n",
- " from iupacaa ,n",
- " to ncbieaa ,n",
- " num 26 ,n",
- " start-at 65 ,n",
- " table {n",
- " 65 , -- they map directlyn",
- " 66 ,n",
- " 67 ,n",
- " 68,n",
- " 69,n",
- " 70,n",
- " 71,n",
- " 72,n",
- " 73,n",
- " 255, -- Jn",
- " 75,n",
- " 76,n",
- " 77,n",
- " 78,n",
- " 255, -- On",
- " 80,n",
- " 81,n",
- " 82,n",
- " 83,n",
- " 84,n",
- " 85, -- U - was 255n",
- " 86,n",
- " 87,n",
- " 88,n",
- " 89,n",
- " 90 }n",
- " } ,n",
- " {n",
- " from ncbieaa ,n",
- " to iupacaa ,n",
- " num 49 ,n",
- " start-at 42 ,n",
- " table {n",
- " 88 , -- termination -> Xn",
- " 255,n",
- " 255,n",
- " 88, -- Gap -> Xn",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 65 , -- from here they map directlyn",
- " 66 ,n",
- " 67 ,n",
- " 68,n",
- " 69,n",
- " 70,n",
- " 71,n",
- " 72,n",
- " 73,n",
- " 255, -- Jn",
- " 75,n",
- " 76,n",
- " 77,n",
- " 78,n",
- " 255, -- On",
- " 80,n",
- " 81,n",
- " 82,n",
- " 83,n",
- " 84,n",
- " 85, -- U was -> Cn",
- " 86,n",
- " 87,n",
- " 88,n",
- " 89,n",
- " 90 }n",
- " } ,n",
- " {n",
- " from iupacaa ,n",
- " to ncbistdaa ,n",
- " num 26 ,n",
- " start-at 65 ,n",
- " table {n",
- " 1 , -- they map directlyn",
- " 2 ,n",
- " 3 ,n",
- " 4,n",
- " 5,n",
- " 6,n",
- " 7,n",
- " 8,n",
- " 9,n",
- " 255, -- Jn",
- " 10,n",
- " 11,n",
- " 12,n",
- " 13,n",
- " 255, -- On",
- " 14,n",
- " 15,n",
- " 16,n",
- " 17,n",
- " 18,n",
- " 24, -- U - was 255n",
- " 19,n",
- " 20,n",
- " 21,n",
- " 22,n",
- " 23 }n",
- " } ,n",
- " {n",
- " from ncbieaa ,n",
- " to ncbistdaa ,n",
- " num 49 ,n",
- " start-at 42 ,n",
- " table {n",
- " 25, -- terminationn",
- " 255,n",
- " 255,n",
- " 0, -- Gapn",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 255,n",
- " 1 , -- they map directlyn",
- " 2 ,n",
- " 3 ,n",
- " 4,n",
- " 5,n",
- " 6,n",
- " 7,n",
- " 8,n",
- " 9,n",
- " 255, -- Jn",
- " 10,n",
- " 11,n",
- " 12,n",
- " 13,n",
- " 255, -- On",
- " 14,n",
- " 15,n",
- " 16,n",
- " 17,n",
- " 18,n",
- " 24, -- Un",
- " 19,n",
- " 20,n",
- " 21,n",
- " 22,n",
- " 23 }n",
- " } ,n",
- " {n",
- " from ncbistdaa ,n",
- " to ncbieaa ,n",
- " num 26 ,n",
- " table {n",
- " 45 , -- "-"n",
- " 65 , -- they map directly with holes for O and Jn",
- " 66 ,n",
- " 67 ,n",
- " 68,n",
- " 69,n",
- " 70,n",
- " 71,n",
- " 72,n",
- " 73,n",
- " 75,n",
- " 76,n",
- " 77,n",
- " 78,n",
- " 80,n",
- " 81,n",
- " 82,n",
- " 83,n",
- " 84,n",
- " 86,n",
- " 87,n",
- " 88,n",
- " 89,n",
- " 90,n",
- " 85, -- Un",
- " 42} -- *n",
- " } ,n",
- " {n",
- " from ncbistdaa ,n",
- " to iupacaa ,n",
- " num 26 ,n",
- " table {n",
- " 255 , -- "-"n",
- " 65 , -- they map directly with holes for O and Jn",
- " 66 ,n",
- " 67 ,n",
- " 68,n",
- " 69,n",
- " 70,n",
- " 71,n",
- " 72,n",
- " 73,n",
- " 75,n",
- " 76,n",
- " 77,n",
- " 78,n",
- " 80,n",
- " 81,n",
- " 82,n",
- " 83,n",
- " 84,n",
- " 86,n",
- " 87,n",
- " 88,n",
- " 89,n",
- " 90,n",
- " 85, -- U - was 88n",
- " 255} -- *n",
- " } n",
- " n",
- " } -- end of mapsn",
- "-- end of seq-code-set -- }", // make sure '}' is last symbol of ASN text
- 0 // to indicate that there is no more data
- };
- END_objects_SCOPE
- END_NCBI_SCOPE
- /*
- * ---------------------------------------------------------------------------
- * $Log: seqport_util.cpp,v $
- * Revision 1000.4 2004/06/01 19:33:29 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R6.24
- *
- * Revision 6.24 2004/05/19 17:25:14 gorelenk
- * Added include of PCH - ncbi_pch.hpp
- *
- * Revision 6.23 2004/03/30 21:25:09 shomrat
- * Do not attempt to pack protein sequences
- *
- * Revision 6.22 2004/01/22 19:13:26 shomrat
- * fixed bug in complement tables
- *
- * Revision 6.21 2003/11/28 19:01:54 vasilche
- * Avoid calling CStreamUtils::Pushback() when constructing objects from text ASN.
- * Fixed warnings about conversion char -> unsigned char.
- *
- * Revision 6.20 2003/11/21 14:45:03 grichenk
- * Replaced runtime_error with CException
- *
- * Revision 6.19 2003/11/06 22:15:58 shomrat
- * fixed behavior for default length value
- *
- * Revision 6.18 2003/11/06 16:12:32 shomrat
- * changed seqport_util to use sequtil
- *
- * Revision 6.17 2003/06/04 17:03:11 rsmith
- * Move static mutex out of function to work around CW complex initialization bug.
- *
- * Revision 6.16 2003/03/11 15:53:25 kuznets
- * iterate -> ITERATE
- *
- * Revision 6.15 2003/01/30 22:50:30 kans
- * U (selenocysteine) is now valid in IUPAC alphabet
- *
- * Revision 6.14 2002/09/19 20:05:44 vasilche
- * Safe initialization of static mutexes
- *
- * Revision 6.13 2002/09/13 18:34:40 dicuccio
- * Fixed problem with static object instantiation and type information.
- * Broke the Seq-code-set ASN.1 blob into more easily editable lines (kans).
- *
- * Revision 6.12 2002/05/15 17:57:03 ucko
- * Make the recently introduced tables STL vectors rather than primitive
- * arrays to work around a GCC 3.0.4 optimizer bug.
- *
- * Revision 6.11 2002/05/14 15:15:16 clausen
- * Added IsCodeAvailable, GetCodeIndexFromTo, GetName, GetIndexComplement, GetMapToIndex
- *
- * Revision 6.10 2002/05/03 21:28:14 ucko
- * Introduce T(Signed)SeqPos.
- *
- * Revision 6.9 2002/04/25 19:37:03 clausen
- * Fixed bug in MapNcbi2naToNcbi4na that caused corrupiton of out_seq
- *
- * Revision 6.8 2002/03/27 19:53:18 grichenk
- * Fixed CR/LF problem in the source
- *
- * Revision 6.7 2002/01/12 07:40:22 vakatov
- * Fixed multiple dangerous typos ('&' instead of '&&' in IFs)
- *
- * Revision 6.6 2002/01/10 19:21:34 clausen
- * Added GetIupacaa3, GetCode, and GetIndex
- *
- * Revision 6.5 2001/10/17 18:35:33 clausen
- * Fixed machine dependencies in InitFastNcbi4naIupacna and InitFastNcbi2naNcbi4na
- *
- * Revision 6.4 2001/10/17 13:04:30 clausen
- * Fixed InitFastNcbi2naIupacna to remove hardware dependency
- *
- * Revision 6.3 2001/09/07 14:16:50 ucko
- * Cleaned up external interface.
- *
- * Revision 6.2 2001/09/06 20:43:32 ucko
- * Fix iterator types (caught by gcc 3.0.1).
- *
- * Revision 6.1 2001/08/24 00:34:23 vakatov
- * Initial revision
- *
- * ===========================================================================
- */