regexpr2.cpp
上传用户:dzyhzl
上传日期:2019-04-29
资源大小:56270k
文件大小:210k
- return m_pgroup->match_group_base<CI>::iterative_rematch_this_( param );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return m_pgroup->match_group_base<CI>::iterative_rematch_this_c( param );
- }
- virtual width_type width_this( width_param<CI> & )
- {
- return zero_width;
- }
- };
- // Behaves like a lookahead assertion if m_cgroup is -1, or like
- // an independent group otherwise.
- template< typename CI >
- class independent_group_base : public match_group_base<CI>
- {
- independent_group_base & operator=( independent_group_base const & );
- template< typename CSTRINGS >
- bool _recursive_match_all( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- backref_tag<CI> * prgbr = NULL;
- // Copy onto the stack the part of the backref vector that could
- // be modified by the lookahead.
- if( m_extent.second )
- {
- prgbr = static_cast<backref_tag<CI>*>( alloca( m_extent.second * sizeof( backref_tag<CI> ) ) );
- std::copy( param.prgbackrefs->begin() + m_extent.first,
- param.prgbackrefs->begin() + m_extent.first + m_extent.second,
- std::raw_storage_iterator<backref_tag<CI>*, backref_tag<CI> >( prgbr ) );
- }
- // Match until the end of this group and then return
- // BUGBUG can the compiler optimize this?
- bool const fdomatch = CSTRINGS::value ?
- match_group_base<CI>::recursive_match_all_c( param, icur ) :
- match_group_base<CI>::recursive_match_all_( param, icur );
- if( m_fexpected == fdomatch )
- {
- // If m_cgroup != 1, then this is not a zero-width assertion.
- if( fdomatch && size_t( -1 ) != m_cgroup )
- icur = ( *param.prgbackrefs )[ m_cgroup ].second;
- if( recursive_match_next_( param, icur, CSTRINGS() ) )
- return true;
- }
- // if match_group::recursive_match_all_ returned true, the backrefs must be restored
- if( m_extent.second && fdomatch )
- std::copy( prgbr, prgbr + m_extent.second, param.prgbackrefs->begin() + m_extent.first );
- return false;
- }
- template< typename CSTRINGS >
- bool _iterative_match_this( match_param<CI> & param, CSTRINGS ) const
- {
- group_wrapper<CI> expr( this );
- _push_frame( param );
- CI istart = param.icur;
- bool const fdomatch = matcher_helper<CI>::_Do_match_iterative( &expr, param, param.icur, CSTRINGS() );
- if( m_fexpected == fdomatch )
- {
- // If m_cgroup == -1, then this is a zero-width assertion.
- if( fdomatch && size_t( -1 ) == m_cgroup )
- param.icur = istart;
- param.next = next();
- return true;
- }
- _pop_frame( param );
- return false;
- }
- bool _iterative_rematch_this( match_param<CI> & param ) const
- {
- _pop_frame( param );
- return false;
- }
- public:
- independent_group_base( size_t cgroup, regex_arena & arena )
- : match_group_base<CI>( cgroup, arena ),
- m_fexpected( true ), m_extent( 0, 0 )
- {
- }
- virtual void set_extent( extent const & ex )
- {
- m_extent = ex;
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, false_t() );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, true_t() );
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- return _iterative_match_this( param, false_t() );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- return _iterative_match_this( param, true_t() );
- }
- virtual bool iterative_rematch_this_( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- protected:
- void _push_frame( match_param<CI> & param ) const
- {
- unsafe_stack * pstack = param.pstack;
- typedef typename match_param<CI>::backref_vector::const_iterator VCI;
- VCI istart = param.prgbackrefs->begin() + m_extent.first;
- VCI iend = istart + m_extent.second;
- for( ; iend != istart; ++istart )
- {
- pstack->push( *istart );
- }
- pstack->push( param.icur );
- }
- void _pop_frame( match_param<CI> & param ) const
- {
- unsafe_stack * pstack = param.pstack;
- typedef typename match_param<CI>::backref_vector::iterator VI;
- VI istart = param.prgbackrefs->begin() + m_extent.first;
- VI iend = istart + m_extent.second;
- pstack->pop( param.icur );
- while( iend != istart )
- {
- pstack->pop( *--iend );
- }
- }
- independent_group_base( bool const fexpected, regex_arena & arena )
- : match_group_base<CI>( size_t( -1 ), arena ), m_fexpected( fexpected )
- {
- }
- bool const m_fexpected;
- extent m_extent;
- };
- template< typename CI >
- class independent_group : public independent_group_base<CI>
- {
- independent_group & operator=( independent_group const & );
- public:
- independent_group( size_t cgroup, regex_arena & arena )
- : independent_group_base<CI>( cgroup, arena ), m_end_group( this )
- {
- }
- virtual ~independent_group()
- {
- _cleanup();
- }
- virtual sub_expr<CI> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
- {
- if( greedy )
- return new( arena ) max_group_quantifier<CI>( this, lbound, ubound );
- else
- return new( arena ) min_group_quantifier<CI>( this, lbound, ubound );
- }
- protected:
- independent_group( bool const fexpected, regex_arena & arena )
- : independent_group_base<CI>( fexpected, arena ),
- m_end_group( this )
- {
- }
- bool _call_back( match_param<CI> & param, CI icur ) const
- {
- if( size_t( -1 ) != m_cgroup )
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_cgroup ];
- br.first = br.reserved1;
- br.second = icur;
- br.matched = true;
- }
- return true;
- }
- class end_group : public indestructable_sub_expr<CI, end_group>
- {
- independent_group<CI> const *const m_pgroup;
- end_group & operator=( end_group const & );
- bool _iterative_match_this( match_param<CI> & param ) const
- {
- size_t cgroup = m_pgroup->group_number();
- if( size_t( -1 ) != cgroup )
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ cgroup ];
- br.first = br.reserved1;
- br.second = param.icur;
- br.matched = true;
- }
- param.next = NULL;
- return true;
- }
- public:
- end_group( independent_group<CI> const * pgroup = NULL )
- : m_pgroup( pgroup )
- {
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return m_pgroup->_call_back( param, icur );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return m_pgroup->_call_back( param, icur );
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- return _iterative_match_this( param );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- return _iterative_match_this( param );
- }
- virtual width_type width_this( width_param<CI> & )
- {
- return zero_width;
- }
- } m_end_group;
- friend class end_group;
- virtual sub_expr<CI> * _get_end_group()
- {
- return & m_end_group;
- }
- };
- template< typename CI >
- class lookahead_assertion : public independent_group<CI>
- {
- lookahead_assertion & operator=( lookahead_assertion const & );
- public:
- lookahead_assertion( bool const fexpected, regex_arena & arena )
- : independent_group<CI>( fexpected, arena )
- {
- }
- virtual sub_expr<CI> * quantify( size_t, size_t, bool, regex_arena & )
- {
- throw bad_regexpr( "look-ahead assertion cannot be quantified" );
- }
- virtual bool is_assertion() const
- {
- return true;
- }
- virtual width_type width_this( width_param<CI> & param )
- {
- // calculate the group's width and store it, but return zero_width
- match_group_base<CI>::width_this( param );
- return zero_width;
- }
- };
- template< typename CI >
- class lookbehind_assertion : public independent_group_base<CI>
- {
- lookbehind_assertion & operator=( lookbehind_assertion const & );
- template< typename CSTRINGS >
- bool _recursive_match_all( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- // This is the room in the string from the start to the current position
- size_t room = std::distance( param.ibegin, icur );
- // If we don't have enough room to match the lookbehind, the match fails.
- // If we wanted the match to fail, try to match the rest of the pattern.
- if( m_nwidth.m_min > room )
- return m_fexpected ? false : recursive_match_next_( param, icur, CSTRINGS() );
- backref_tag<CI> * prgbr = NULL;
- // Copy onto the stack the part of the backref vector that could
- // be modified by the lookbehind.
- if( m_extent.second )
- {
- prgbr = static_cast<backref_tag<CI>*>( alloca( m_extent.second * sizeof( backref_tag<CI> ) ) );
- std::copy( param.prgbackrefs->begin() + m_extent.first,
- param.prgbackrefs->begin() + m_extent.first + m_extent.second,
- std::raw_storage_iterator<backref_tag<CI>*, backref_tag<CI> >( prgbr ) );
- }
- CI local_istart = icur;
- std::advance( local_istart, -int( (std::min)( m_nwidth.m_max, room ) ) );
- CI local_istop = icur;
- std::advance( local_istop, -int( m_nwidth.m_min - 1 ) );
- // Create a local param struct that has icur as param.iend
- match_param<CI> local_param( param.ibegin, param.istart, icur, param.prgbackrefs );
- // Find the rightmost match that ends at icur.
- for( CI local_icur = local_istart; local_icur != local_istop; ++local_icur )
- {
- // Match until the end of this group and then return
- // Note that we're calling recursive_match_all_ regardless of the CSTRINGS switch.
- // This is because for the lookbehind assertion, the termination condition is when
- // icur == param.iend, not when *icur == ' '
- bool const fmatched = match_group_base<CI>::recursive_match_all_( local_param, local_icur );
- // If the match results were what we were expecting, try to match the
- // rest of the pattern. If that succeeds, return true.
- if( m_fexpected == fmatched && recursive_match_next_( param, icur, CSTRINGS() ) )
- return true;
- // if match_group::recursive_match_all_ returned true, the backrefs must be restored
- if( fmatched )
- {
- if( m_extent.second )
- std::copy( prgbr, prgbr + m_extent.second, param.prgbackrefs->begin() + m_extent.first );
- // Match succeeded. If this is a negative lookbehind, we didn't want it
- // to succeed, so return false.
- if( ! m_fexpected )
- return false;
- }
- }
- // No variation of the lookbehind was satisfied in a way that permited
- // the rest of the pattern to match successfully, so return false.
- return false;
- }
- template< typename CSTRINGS >
- bool _iterative_match_this( match_param<CI> & param, CSTRINGS ) const
- {
- // Save the backrefs
- _push_frame( param );
- // This is the room in the string from the start to the current position
- size_t room = std::distance( param.ibegin, param.icur );
- // If we don't have enough room to match the lookbehind, the match fails.
- // If we wanted the match to fail, try to match the rest of the pattern.
- if( m_nwidth.m_min > room )
- {
- if( m_fexpected )
- {
- _pop_frame( param );
- return false;
- }
- param.next = next();
- return true;
- }
- CI local_istart = param.icur;
- std::advance( local_istart, -int( (std::min)( m_nwidth.m_max, room ) ) );
- CI local_istop = param.icur;
- std::advance( local_istop, -int( m_nwidth.m_min - 1 ) );
- // Create a local param struct that has icur as param.iend
- match_param<CI> local_param( param.ibegin, param.istart, param.icur, param.prgbackrefs );
- local_param.pstack = param.pstack;
- group_wrapper<CI> expr( this );
- // Find the rightmost match that ends at icur.
- for( CI local_icur = local_istart; local_icur != local_istop; ++local_icur )
- {
- // Match until the end of this group and then return
- // Note that we're calling _Do_match_iterative_helper regardless of the CSTRINGS switch.
- // This is because for the lookbehind assertion, the termination condition is when
- // icur == param.iend, not when *icur == ' '
- bool const fmatched = matcher_helper<CI>::_Do_match_iterative_helper( &expr, local_param, local_icur );
- // If the match results were what we were expecting, try to match the
- // rest of the pattern. If that succeeds, return true.
- if( m_fexpected == fmatched )
- {
- param.next = next();
- return true;
- }
- // if match_group::recursive_match_all_ returned true, the backrefs must be restored
- if( fmatched )
- {
- // Restore the backrefs
- _pop_frame( param );
- // Match succeeded. If this is a negative lookbehind, we didn't want it
- // to succeed, so return false.
- if( ! m_fexpected )
- return false;
- // Save the backrefs again.
- _push_frame( param );
- }
- }
- // No variation of the lookbehind was satisfied in a way that permited
- // the rest of the pattern to match successfully, so return false.
- _pop_frame( param );
- return false;
- }
- bool _iterative_rematch_this( match_param<CI> & param ) const
- {
- _pop_frame( param );
- return false;
- }
- public:
- lookbehind_assertion( bool const fexpected, regex_arena & arena )
- : independent_group_base<CI>( fexpected, arena )
- {
- }
- virtual ~lookbehind_assertion()
- {
- _cleanup();
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, false_t() );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, true_t() );
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- return _iterative_match_this( param, false_t() );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- return _iterative_match_this( param, true_t() );
- }
- virtual bool iterative_rematch_this_( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual bool is_assertion() const
- {
- return true;
- }
- virtual width_type width_this( width_param<CI> & param )
- {
- // calculate the group's width and store it, but return zero_width
- match_group_base<CI>::width_this( param );
- return zero_width;
- }
- protected:
- struct end_group : public indestructable_sub_expr<CI, end_group>
- {
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return param.istop == icur;
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return param.istop == icur;
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- param.next = NULL;
- return param.istop == param.icur;
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- param.next = NULL;
- return param.istop == param.icur;
- }
- virtual width_type width_this( width_param<CI> & )
- {
- return zero_width;
- }
- } m_end_group;
- virtual sub_expr<CI> * _get_end_group()
- {
- return & m_end_group;
- }
- };
- template< typename CI >
- class group_quantifier : public match_quantifier<CI>
- {
- group_quantifier & operator=( group_quantifier const & );
-
- bool _iterative_match_this( match_param<CI> & param ) const
- {
- _push_frame( param );
- param.next = m_psub->next(); // ptr to end_quant
- return true;
- }
- bool _iterative_rematch_this( match_param<CI> & param ) const
- {
- _pop_frame( param );
- return false;
- }
- public:
- group_quantifier( match_group_base<CI> * psub,
- size_t lbound, size_t ubound,
- sub_expr<CI> * pend_quant )
- : match_quantifier<CI>( psub, lbound, ubound ),
- m_group( *psub )
- {
- *psub->pnext() = pend_quant;
- }
- // sub-classes of group_quantifer that own the end_quant
- // object must declare a destructor, and it must call _cleanup
- virtual ~group_quantifier() = 0;
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- return _iterative_match_this( param );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- return _iterative_match_this( param );
- }
- virtual bool iterative_rematch_this_( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- protected:
- struct old_quant
- {
- size_t reserved2;
- bool reserved3;
- CI reserved4;
- CI reserved5;
- old_quant()
- {
- }
- old_quant( backref_tag<CI> const & br )
- : reserved2( br.reserved2 ), reserved3( br.reserved3 ),
- reserved4( br.reserved4 ), reserved5( br.reserved5 )
- {
- }
- };
- void _push_frame( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ group_number() ];
- old_quant old_qt( br );
- param.pstack->push( old_qt );
- br.reserved2 = 0; // nbr of times this group has matched
- br.reserved3 = true; // toggle used for backtracking
- br.reserved4 = static_init<CI>::value;
- br.reserved5 = static_init<CI>::value;
- }
- void _pop_frame( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ group_number() ];
- old_quant old_qt;
- param.pstack->pop( old_qt );
- br.reserved2 = old_qt.reserved2;
- br.reserved3 = old_qt.reserved3;
- br.reserved4 = old_qt.reserved4;
- br.reserved5 = old_qt.reserved5;
- }
- size_t group_number() const
- {
- return m_group.group_number();
- }
- size_t & cmatches( match_param<CI> & param ) const
- {
- return ( *param.prgbackrefs )[ group_number() ].reserved2;
- }
- CI & highwater1( match_param<CI> & param ) const
- {
- return ( *param.prgbackrefs )[ group_number() ].reserved4;
- }
- CI & highwater2( match_param<CI> & param ) const
- {
- return ( *param.prgbackrefs )[ group_number() ].reserved5;
- }
- match_group_base<CI> const & m_group;
- };
- template< typename CI >
- inline group_quantifier<CI>::~group_quantifier()
- {
- }
- template< typename CI >
- class max_group_quantifier : public group_quantifier<CI>
- {
- max_group_quantifier & operator=( max_group_quantifier const & );
-
- template< typename CSTRINGS >
- bool _recursive_match_all( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- CI old_highwater1 = highwater1( param );
- CI old_highwater2 = highwater2( param );
- size_t old_cmatches = cmatches( param );
- highwater1( param ) = static_init<CI>::value;
- highwater2( param ) = icur;
- cmatches( param ) = 0;
- if( _recurse( param, icur, CSTRINGS() ) )
- return true;
- cmatches( param ) = old_cmatches;
- highwater2( param ) = old_highwater2;
- highwater1( param ) = old_highwater1;
- return false;
- }
- public:
- max_group_quantifier( match_group_base<CI> * psub, size_t lbound, size_t ubound )
- : group_quantifier<CI>( psub, lbound, ubound, & m_end_quant ),
- m_end_quant( this )
- {
- }
- virtual ~max_group_quantifier()
- {
- // Must call _cleanup() here before the end_quant object
- // gets destroyed.
- _cleanup();
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, false_t() );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, true_t() );
- }
- protected:
- template< typename CSTRINGS >
- bool _recurse( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- if( m_ubound == cmatches( param ) )
- return recursive_match_next_( param, icur, CSTRINGS() );
- ++cmatches( param );
- if( m_psub->recursive_match_all_( param, icur, CSTRINGS() ) )
- return true;
- if( --cmatches( param ) < m_lbound )
- return false;
- return recursive_match_next_( param, icur, CSTRINGS() );
- }
- class end_quantifier : public indestructable_sub_expr<CI, end_quantifier>
- {
- max_group_quantifier<CI> const *const m_pquant;
- end_quantifier & operator=( end_quantifier const & );
- void _push_frame( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_pquant->group_number() ];
- param.pstack->push( br.reserved4 );
- br.reserved4 = br.reserved5;
- br.reserved5 = param.icur;
- }
- void _pop_frame( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_pquant->group_number() ];
- br.reserved5 = br.reserved4;
- param.pstack->pop( br.reserved4 );
- }
- template< typename CSTRINGS >
- bool _recursive_match_all( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- CI old_highwater1 = m_pquant->highwater1( param );
- if( icur == old_highwater1 )
- return m_pquant->recursive_match_next_( param, icur, CSTRINGS() );
- m_pquant->highwater1( param ) = m_pquant->highwater2( param );
- m_pquant->highwater2( param ) = icur;
- if( m_pquant->_recurse( param, icur, CSTRINGS() ) )
- return true;
- m_pquant->highwater2( param ) = m_pquant->highwater1( param );
- m_pquant->highwater1( param ) = old_highwater1;
- return false;
- }
- bool _iterative_match_this( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_pquant->group_number() ];
- // forcibly break the infinite loop
- if( param.icur == br.reserved4 )
- {
- _push_frame( param );
- param.next = m_pquant->next();
- return true;
- }
- _push_frame( param );
- // If we've matched the max nbr of times, move on to the next
- // sub-expr.
- if( m_pquant->m_ubound == br.reserved2 )
- {
- param.next = m_pquant->next();
- br.reserved3 = false;
- return true;
- }
- // Rematch the group.
- br.reserved3 = true;
- param.next = m_pquant->m_psub;
- ++br.reserved2;
- return true;
- }
- bool _iterative_rematch_this( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_pquant->group_number() ];
- // infinite loop forcibly broken
- if( param.icur == param.pstack->top( static_init<CI>::value ) )
- {
- _pop_frame( param );
- return false;
- }
- if( br.reserved3 )
- {
- --br.reserved2;
- param.next = m_pquant->next();
- if( m_pquant->m_lbound <= br.reserved2 )
- {
- br.reserved3 = false;
- return true;
- }
- _pop_frame( param );
- return false;
- }
- br.reserved3 = true;
- _pop_frame( param );
- return false;
- }
- public:
- end_quantifier( max_group_quantifier<CI> const * pquant = NULL )
- : m_pquant( pquant )
- {
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, false_t() );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, true_t() );
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- return _iterative_match_this( param );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- return _iterative_match_this( param );
- }
- virtual bool iterative_rematch_this_( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual width_type width_this( width_param<CI> & )
- {
- return zero_width;
- }
- } m_end_quant;
- friend class end_quantifier;
- };
- template< typename CI >
- class min_group_quantifier : public group_quantifier<CI>
- {
- min_group_quantifier & operator=( min_group_quantifier const & );
- template< typename CSTRINGS >
- bool _recursive_match_all( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- CI old_highwater1 = highwater1( param );
- CI old_highwater2 = highwater2( param );
- size_t old_cmatches = cmatches( param );
- highwater1( param ) = static_init<CI>::value;
- highwater2( param ) = icur;
- cmatches( param ) = 0;
- if( _recurse( param, icur, CSTRINGS() ) )
- return true;
- cmatches( param ) = old_cmatches;
- highwater2( param ) = old_highwater2;
- highwater1( param ) = old_highwater1;
- return false;
- }
- public:
- min_group_quantifier( match_group_base<CI> * psub, size_t lbound, size_t ubound )
- : group_quantifier<CI>( psub, lbound, ubound, & m_end_quant ),
- m_end_quant( this )
- {
- }
- virtual ~min_group_quantifier()
- {
- // Must call _cleanup() here before the end_quant object
- // gets destroyed.
- _cleanup();
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, false_t() );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, true_t() );
- }
- protected:
- template< typename CSTRINGS >
- bool _recurse( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- if( m_lbound <= cmatches( param ) )
- {
- if( recursive_match_next_( param, icur, CSTRINGS() ) )
- return true;
- }
- if( m_ubound > cmatches( param ) )
- {
- ++cmatches( param );
- if( m_psub->recursive_match_all_( param, icur, CSTRINGS() ) )
- return true;
- --cmatches( param );
- }
- return false;
- }
- class end_quantifier : public indestructable_sub_expr<CI, end_quantifier>
- {
- min_group_quantifier<CI> const *const m_pquant;
- end_quantifier & operator=( end_quantifier const & );
- void _push_frame( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_pquant->group_number() ];
- param.pstack->push( br.reserved4 );
- br.reserved4 = br.reserved5;
- br.reserved5 = param.icur;
- }
- void _pop_frame( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_pquant->group_number() ];
- br.reserved5 = br.reserved4;
- param.pstack->pop( br.reserved4 );
- }
- template< typename CSTRINGS >
- bool _recursive_match_all( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- CI old_highwater1 = m_pquant->highwater1( param );
- if( icur == old_highwater1 )
- return m_pquant->recursive_match_next_( param, icur, CSTRINGS() );
- m_pquant->highwater1( param ) = m_pquant->highwater2( param );
- m_pquant->highwater2( param ) = icur;
- if( m_pquant->_recurse( param, icur, CSTRINGS() ) )
- return true;
- m_pquant->highwater2( param ) = m_pquant->highwater1( param );
- m_pquant->highwater1( param ) = old_highwater1;
- return false;
- }
- bool _iterative_match_this( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_pquant->group_number() ];
- // forcibly break the infinite loop
- if( param.icur == br.reserved4 )
- {
- _push_frame( param );
- param.next = m_pquant->next();
- return true;
- }
- _push_frame( param );
- if( m_pquant->m_lbound <= br.reserved2 )
- {
- br.reserved3 = false;
- param.next = m_pquant->next();
- return true;
- }
- ++br.reserved2;
- param.next = m_pquant->m_psub;
- return true;
- }
-
- bool _iterative_rematch_this( match_param<CI> & param ) const
- {
- backref_tag<CI> & br = ( *param.prgbackrefs )[ m_pquant->group_number() ];
- // infinite loop forcibly broken
- if( param.icur == param.pstack->top( static_init<CI>::value ) )
- {
- _pop_frame( param );
- return false;
- }
- if( br.reserved3 )
- {
- --br.reserved2;
- _pop_frame( param );
- return false;
- }
- br.reserved3 = true;
- if( m_pquant->m_ubound > br.reserved2 )
- {
- ++br.reserved2;
- param.next = m_pquant->m_psub;
- return true;
- }
- _pop_frame( param );
- return false;
- }
- public:
- end_quantifier( min_group_quantifier<CI> const * pquant = NULL )
- : m_pquant( pquant )
- {
- }
-
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, false_t() );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, true_t() );
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- return _iterative_match_this( param );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- return _iterative_match_this( param );
- }
- virtual bool iterative_rematch_this_( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual width_type width_this( width_param<CI> & )
- {
- return zero_width;
- }
- } m_end_quant;
- friend class end_quantifier;
- };
- inline void fixup_backref( size_t & cbackref, std::list<size_t> const & invisible_groups )
- {
- std::list<size_t>::const_iterator iter = invisible_groups.begin();
- for( ; iter != invisible_groups.end() && cbackref >= *iter; ++iter )
- {
- ++cbackref;
- }
- }
- template< typename CI >
- class match_backref : public sub_expr<CI>
- {
- bool _iterative_rematch_this( match_param<CI> & param ) const
- {
- backref_tag<CI> const & br = ( *param.prgbackrefs )[ m_cbackref ];
- ptrdiff_t dist = std::distance( br.first, br.second );
- std::advance( param.icur, ( int ) - dist );
- return false;
- }
- public:
- match_backref( size_t cbackref )
- : m_cbackref( cbackref )
- {
- }
- // Return the width specifications of the group to which this backref refers
- virtual width_type width_this( width_param<CI> & param )
- {
- // fix up the backref to take into account the number of invisible groups
- if( param.first_pass() )
- fixup_backref( m_cbackref, param.invisible_groups );
- if( m_cbackref >= param.rggroups.size() )
- throw bad_regexpr( "reference to nonexistent group" );
- // If the entry in the backref vector has been nulled out, then we are
- // calculating the width for this group.
- if( NULL == param.rggroups[ m_cbackref ] )
- return worst_width; // can't tell how wide this group will be. :-(
- return param.rggroups[ m_cbackref ]->width_this( param );
- }
- virtual bool iterative_rematch_this_( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- protected:
- size_t m_cbackref;
- };
- template< typename CMP, typename CI >
- class match_backref_t : public match_backref<CI>
- {
- public:
- match_backref_t( size_t cbackref )
- : match_backref<CI>( cbackref )
- {
- }
- virtual sub_expr<CI> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
- {
- if( greedy )
- return new( arena ) max_atom_quantifier<CI, match_backref_t<CMP, CI> >( this, lbound, ubound );
- else
- return new( arena ) min_atom_quantifier<CI, match_backref_t<CMP, CI> >( this, lbound, ubound );
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return ( match_backref_t::recursive_match_this_( param, icur ) && recursive_match_next_( param, icur, false_t() ) );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return ( match_backref_t::recursive_match_this_c( param, icur ) && recursive_match_next_( param, icur, true_t() ) );
- }
- virtual bool recursive_match_this_( match_param<CI> & param, CI & icur ) const
- {
- return _do_match_this( param, icur, false_t() );
- }
- virtual bool recursive_match_this_c( match_param<CI> & param, CI & icur ) const
- {
- return _do_match_this( param, icur, true_t() );
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- param.next = next();
- return _do_match_this( param, param.icur, false_t() );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- param.next = next();
- return _do_match_this( param, param.icur, true_t() );
- }
- protected:
- template< typename CSTRINGS >
- bool _do_match_this( match_param<CI> & param, CI & icur, CSTRINGS ) const
- {
- // Pattern compilation should have failed if the following is false:
- assert( m_cbackref < param.prgbackrefs->size() );
- // Don't match a backref that hasn't match anything
- if( ! ( *param.prgbackrefs )[ m_cbackref ].matched )
- return false;
- CI ithis = ( *param.prgbackrefs )[ m_cbackref ].first;
- CI const istop = ( *param.prgbackrefs )[ m_cbackref ].second;
- CI icur_tmp = icur;
- for( ; istop != ithis; ++icur_tmp, ++ithis )
- {
- if( eos_t<CSTRINGS>::eval( param, icur_tmp ) || CMP::eval( *icur_tmp, *ithis ) )
- return false;
- }
- icur = icur_tmp;
- return true;
- }
- };
- template< typename CI >
- inline match_backref<CI> * create_backref(
- size_t cbackref,
- REGEX_FLAGS flags, regex_arena & arena )
- {
- typedef typename std::iterator_traits<CI>::value_type char_type;
- switch( NOCASE & flags )
- {
- case 0:
- return new( arena ) match_backref_t<ch_neq_t<char_type>, CI>( cbackref );
- case NOCASE:
- return new( arena ) match_backref_t<ch_neq_nocase_t<char_type>, CI>( cbackref );
- default:
- __assume( 0 ); // tells the compiler that this is unreachable
- }
- }
- template< typename CI >
- class match_recurse : public sub_expr<CI>
- {
- match_recurse & operator=( match_recurse const & );
- void _push_frame( match_param<CI> & param ) const
- {
- typedef typename match_param<CI>::backref_vector::const_iterator VCI;
- unsafe_stack * pstack = param.pstack;
- VCI istart = param.prgbackrefs->begin();
- VCI iend = param.prgbackrefs->end();
- for( ; iend != istart; ++istart )
- {
- pstack->push( istart->reserved1 );
- }
- }
- void _pop_frame( match_param<CI> & param ) const
- {
- typedef typename match_param<CI>::backref_vector::iterator VI;
- unsafe_stack * pstack = param.pstack;
- VI istart = param.prgbackrefs->begin();
- VI iend = param.prgbackrefs->end();
- while( iend != istart )
- {
- --iend;
- pstack->pop( iend->reserved1 );
- }
- }
- template< typename CSTRINGS >
- bool _recursive_match_all( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- // Prevent infinite recursion. If icur == ( *param.prgbackrefs )[ 0 ].reserved1,
- // then the pattern has eaten 0 chars to date, and we would recurse forever.
- if( icur == ( *param.prgbackrefs )[ 0 ].reserved1 )
- return recursive_match_next_( param, icur, CSTRINGS() );
- // copy the backref vector onto the stack
- CI * prgci = static_cast<CI*>( alloca( param.prgbackrefs->size() * sizeof( CI ) ) );
- save_backrefs<CI>( *param.prgbackrefs, prgci );
- // Recurse.
- if( param.first->recursive_match_all_( param, icur, CSTRINGS() ) )
- {
- // Restore the backref vector
- restore_backrefs<CI>( *param.prgbackrefs, prgci );
- // Recursive match succeeded. Try to match the rest of the pattern
- // using the end of the recursive match as the start of the next
- return recursive_match_next_( param, ( *param.prgbackrefs )[ 0 ].second, CSTRINGS() );
- }
- // Recursion failed
- return false;
- }
- template< typename CSTRINGS >
- bool _iterative_match_this( match_param<CI> & param, CSTRINGS ) const
- {
- param.pstack->push( param.icur );
- // Prevent infine recursion
- if( param.icur == ( *param.prgbackrefs )[ 0 ].reserved1 )
- {
- param.next = next();
- return true;
- }
- _push_frame( param );
- if( matcher_helper<CI>::_Do_match_iterative( param.first, param, param.icur, CSTRINGS() ) )
- {
- _pop_frame( param );
- param.next = next();
- return true;
- }
- _pop_frame( param );
- param.pstack->pop( param.icur );
- return false;
- }
- bool _iterative_rematch_this( match_param<CI> & param ) const
- {
- param.pstack->pop( param.icur );
- return false;
- }
- public:
- match_recurse()
- {
- }
- virtual sub_expr<CI> * quantify( size_t, size_t, bool, regex_arena & )
- {
- throw bad_regexpr( "recursion sub-expression cannot be quantified" );
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, false_t() );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, true_t() );
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- return _iterative_match_this( param, false_t() );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- return _iterative_match_this( param, true_t() );
- }
- virtual bool iterative_rematch_this_( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param );
- }
- virtual width_type width_this( width_param<CI> & param )
- {
- // We need to know how big the whole pattern is before we can say
- // how big a recursive match would be.
- if( param.first_pass() )
- {
- ++param.cookie;
- return zero_width;
- }
- width_type this_width = param.total_width;
- this_width.m_max = width_mult( this_width.m_max, size_t( -1 ) ); // could recurse forever
- return this_width;
- }
- };
- template< typename CI >
- inline match_recurse<CI> * create_recurse( regex_arena & arena )
- {
- return new( arena ) match_recurse<CI>();
- }
- template< typename CI >
- struct backref_condition
- {
- size_t m_cbackref;
- backref_condition( size_t cbackref )
- : m_cbackref( cbackref )
- {
- }
- template< typename CSTRINGS >
- bool recursive_match_this_( match_param<CI> & param, CI, CSTRINGS ) const
- {
- return m_cbackref < param.prgbackrefs->size() && ( *param.prgbackrefs )[ m_cbackref ].matched;
- }
- template< typename CSTRINGS >
- bool iterative_match_this_( match_param<CI> & param, CSTRINGS ) const
- {
- return m_cbackref < param.prgbackrefs->size() && ( *param.prgbackrefs )[ m_cbackref ].matched;
- }
- template< typename CSTRINGS >
- bool iterative_rematch_this_( match_param<CI> &, CSTRINGS ) const
- {
- return false;
- }
- void width_this( width_param<CI> & param )
- {
- // fix up the backref to take into account the number of invisible groups
- if( param.first_pass() )
- fixup_backref( m_cbackref, param.invisible_groups );
- }
- };
- template< typename CI >
- struct assertion_condition
- {
- std::auto_ptr<match_group_base<CI> > m_passert;
- assertion_condition( match_group_base<CI> * passert )
- : m_passert( passert )
- {
- }
- bool recursive_match_this_( match_param<CI> & param, CI icur, false_t ) const
- {
- return m_passert->recursive_match_all_( param, icur );
- }
- bool recursive_match_this_( match_param<CI> & param, CI icur, true_t ) const
- {
- return m_passert->recursive_match_all_c( param, icur );
- }
- bool iterative_match_this_( match_param<CI> & param, false_t ) const
- {
- return m_passert->iterative_match_this_( param );
- }
- bool iterative_match_this_( match_param<CI> & param, true_t ) const
- {
- return m_passert->iterative_match_this_c( param );
- }
- bool iterative_rematch_this_( match_param<CI> & param, false_t ) const
- {
- return m_passert->iterative_rematch_this_( param );
- }
- bool iterative_rematch_this_( match_param<CI> & param, true_t ) const
- {
- return m_passert->iterative_rematch_this_c( param );
- }
- void width_this( width_param<CI> & param )
- {
- ( void ) m_passert->width_this( param );
- }
- };
- template< typename CI, typename COND >
- class match_conditional : public match_group<CI>
- {
- protected:
- typedef typename match_group<CI>::alt_list_type alt_list_type;
- private:
- match_conditional & operator=( match_conditional const & );
- template< typename CSTRINGS >
- bool _recursive_match_all( match_param<CI> & param, CI icur, CSTRINGS ) const
- {
- typedef typename alt_list_type::const_iterator LCI;
- LCI ialt = m_rgalternates.begin();
- if( m_condition.recursive_match_this_( param, icur, CSTRINGS() ) || ++ialt != m_rgalternates.end() )
- {
- return (*ialt)->recursive_match_all_( param, icur, CSTRINGS() );
- }
- return recursive_match_next_( param, icur, CSTRINGS() );
- }
- template< typename CSTRINGS >
- bool _iterative_match_this( match_param<CI> & param, CSTRINGS ) const
- {
- typedef typename alt_list_type::const_iterator LCI;
- LCI ialt = m_rgalternates.begin();
- if( m_condition.iterative_match_this_( param, CSTRINGS() ) )
- {
- param.pstack->push( true );
- param.next = *ialt;
- return true;
- }
- param.pstack->push( false );
- param.next = ( ++ialt != m_rgalternates.end() ) ? *ialt : next();
- return true;
- }
- template< typename CSTRINGS >
- bool _iterative_rematch_this( match_param<CI> & param, CSTRINGS ) const
- {
- bool condition;
- param.pstack->pop( condition );
- if( condition )
- m_condition.iterative_rematch_this_( param, CSTRINGS() );
- return false;
- }
- public:
- typedef COND condition_type;
- match_conditional( size_t cgroup, condition_type condition, regex_arena & arena )
- : match_group<CI>( cgroup, arena ),
- m_condition( condition )
- {
- }
- virtual bool recursive_match_all_( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, false_t() );
- }
- virtual bool recursive_match_all_c( match_param<CI> & param, CI icur ) const
- {
- return _recursive_match_all( param, icur, true_t() );
- }
- virtual bool iterative_match_this_( match_param<CI> & param ) const
- {
- return _iterative_match_this( param, false_t() );
- }
- virtual bool iterative_match_this_c( match_param<CI> & param ) const
- {
- return _iterative_match_this( param, true_t() );
- }
- virtual bool iterative_rematch_this_( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param, false_t() );
- }
- virtual bool iterative_rematch_this_c( match_param<CI> & param ) const
- {
- return _iterative_rematch_this( param, true_t() );
- }
- virtual width_type width_this( width_param<CI> & param )
- {
- typedef typename alt_list_type::const_iterator LCI;
- LCI ialt = m_rgalternates.begin();
- width_type width = ( *ialt )->get_width( param );
- if( ++ialt != m_rgalternates.end() )
- {
- width_type temp_width = ( *ialt )->get_width( param );
- width.m_min = (std::min)( width.m_min, temp_width.m_min );
- width.m_max = (std::max)( width.m_max, temp_width.m_max );
- }
- else
- {
- width.m_min = 0;
- }
- // Have the condition calculate its width, too. This is important
- // if the condition is a lookbehind assertion.
- m_condition.width_this( param );
- return m_nwidth = width;
- }
- protected:
- condition_type m_condition;
- };
- template< typename CI >
- inline match_conditional<CI, backref_condition<CI> > * create_backref_conditional(
- size_t cgroup,
- size_t cbackref,
- regex_arena & arena )
- {
- backref_condition<CI> cond( cbackref );
- return new( arena ) match_conditional<CI, backref_condition<CI> >(
- cgroup, cond, arena );
- }
- template< typename CI >
- inline match_conditional<CI, assertion_condition<CI> > * create_assertion_conditional(
- size_t cgroup,
- match_group_base<CI> * passert,
- regex_arena & arena )
- {
- assertion_condition<CI> cond( passert );
- return new( arena ) match_conditional<CI, assertion_condition<CI> >(
- cgroup, cond, arena );
- }
- // REGEX_ALLOCATOR is a #define which determines which allocator
- // gets used as the STL-compliant allocator. When sub_expr objects
- // contain STL containers as members, REGEX_ALLOCATOR is the allocator
- // type used. (See the match_group and charset classes.) If REGEX_ALLOCATOR
- // expands to regex_allocator, then *all* memory used when compiling
- // a pattern ends up in a regex_arena. In that case, destructors do
- // not need to be called; we can just throw the arena away. However, if
- // REGEX_ALLOCATOR expands to anything else (std::allocator, for instance)
- // then some of the memory does not live in a regex_arena, and destructors
- // do need to be called. The following code enforces this logic.
- template< typename AL >
- struct skip_dtor_calls
- {
- enum { value = false };
- };
- template<>
- struct skip_dtor_calls< regex_allocator<char> >
- {
- enum { value = true };
- };
- //
- // From basic_rpattern_base_impl
- //
- template< typename CI >
- REGEXPR_H_INLINE basic_rpattern_base_impl<CI>::~basic_rpattern_base_impl()
- {
- if( skip_dtor_calls< REGEX_ALLOCATOR<char> >::value )
- {
- m_pfirst.release();
- }
- assign_auto_ptr( m_pfirst, static_cast<sub_expr_base<CI>const*>(0) );
- m_arena.deallocate();
- }
- template< typename CI >
- REGEXPR_H_INLINE bool basic_rpattern_base_impl<CI>::_do_match( match_param<CI> & param, bool use_null ) const
- {
- if( GLOBAL & flags() ) // do a global find
- {
- // The NOBACKREFS flag is ignored in the match method.
- bool const fAll = ( ALLBACKREFS == ( ALLBACKREFS & flags() ) );
- bool const fFirst = ( FIRSTBACKREFS == ( FIRSTBACKREFS & flags() ) );
- backref_vector rgtempbackrefs;
- while( matcher_helper<CI>::_Do_match( *this, param, use_null ) )
- {
- backref_type const & br = ( *param.prgbackrefs )[0];
- // Handle specially the backref flags
- if( fFirst )
- rgtempbackrefs.push_back( br );
- else if( fAll )
- rgtempbackrefs.insert(
- rgtempbackrefs.end(),
- param.prgbackrefs->begin(),
- param.prgbackrefs->end() );
- else
- rgtempbackrefs.swap( *param.prgbackrefs );
- param.istart = br.second;
- param.no0len = ( br.first == br.second );
- }
- // restore the backref vectors
- param.prgbackrefs->swap( rgtempbackrefs );
- return ! param.prgbackrefs->empty();
- }
- else
- return matcher_helper<CI>::_Do_match( *this, param, use_null );
- }
- template< typename CI >
- REGEXPR_H_INLINE bool basic_rpattern_base_impl<CI>::_do_match( match_param<CI> & param, char_type const * szbegin ) const
- {
- if( RIGHTMOST & flags() )
- {
- // We need to know the end of the string if we're doing a
- // RIGHTMOST match
- param.istop = param.istart;
- std::advance( param.istop, traits_type::length( szbegin ) );
- return basic_rpattern_base_impl<CI>::_do_match( param, false );
- }
- return basic_rpattern_base_impl<CI>::_do_match( param, true );
- }
- template< typename CI >
- REGEXPR_H_INLINE size_t basic_rpattern_base_impl<CI>::_do_count( match_param<CI> & param, bool use_null ) const
- {
- size_t cmatches = 0;
- while( matcher_helper<CI>::_Do_match( *this, param, use_null ) )
- {
- backref_type const & br = ( *param.prgbackrefs )[0];
- ++cmatches;
- param.istart = br.second;
- param.no0len = ( br.first == br.second );
- }
- return cmatches;
- }
- template< typename CI >
- REGEXPR_H_INLINE size_t basic_rpattern_base_impl<CI>::_do_count( match_param<CI> & param, char_type const * szbegin ) const
- {
- if( RIGHTMOST & flags() )
- {
- // We need to know the end of the string if we're doing a
- // RIGHTMOST count
- param.istop = param.istart;
- std::advance( param.istop, traits_type::length( szbegin ) );
- return basic_rpattern_base_impl<CI>::_do_count( param, false );
- }
- return basic_rpattern_base_impl<CI>::_do_count( param, true );
- }
- // A helper class for automatically deallocating the arena when
- // parsing the pattern results in an exception
- struct deallocation_helper
- {
- deallocation_helper( regex_arena & arena )
- : m_arena( arena ),
- m_fparse_successful( false )
- {
- }
- ~deallocation_helper()
- {
- if( ! m_fparse_successful )
- m_arena.deallocate();
- }
-
- void dismiss()
- {
- m_fparse_successful = true;
- }
- private:
- deallocation_helper & operator=( deallocation_helper const & );
- regex_arena & m_arena;
- bool m_fparse_successful;
- };
- } // namespace detail
- //
- // Implementation of basic_rpattern_base:
- //
- template< typename CI, typename SY >
- REGEXPR_H_INLINE void basic_rpattern_base<CI, SY>::init( string_type const & pat, REGEX_FLAGS flags, REGEX_MODE mode )
- {
- basic_rpattern_base<CI, SY> temp( pat, flags, mode );
- swap( temp );
- }
- template< typename CI, typename SY >
- REGEXPR_H_INLINE void basic_rpattern_base<CI, SY>::init( string_type const & pat, string_type const & subst, REGEX_FLAGS flags, REGEX_MODE mode )
- {
- basic_rpattern_base<CI, SY> temp( pat, subst, flags, mode );
- swap( temp );
- }
- template< typename CI, typename SY >
- REGEXPR_H_INLINE void basic_rpattern_base<CI, SY>::_common_init( REGEX_FLAGS flags )
- {
- m_cgroups = 0;
- std::vector<detail::match_group_base<CI>*> rggroups;
- typename string_type::iterator ipat = m_pat->begin();
- iter_wrap iw( ipat );
- syntax_type sy( flags );
- detail::match_group_base<CI> * pgroup;
- // Set up a sentry that will free the arena memory
- // automatically on parse failure.
- {
- detail::deallocation_helper parse_sentry( m_arena );
- // This will throw on failure
- pgroup = _find_next_group( iw, NULL, sy, rggroups );
- // Note that the parse was successful
- parse_sentry.dismiss();
- }
- assert( NULL == m_pfirst.get() );
- detail::assign_auto_ptr( m_pfirst, pgroup );
- // Calculate the width of the pattern and all groups
- m_nwidth = pgroup->group_width( rggroups, m_invisible_groups );
- //
- // determine if we can get away with only calling m_pfirst->recursive_match_all_ only once
- //
- m_floop = true;
- // Optimization: if first character of pattern string is '^'
- // and we are not doing a multiline match, then we only
- // need to try recursive_match_all_ once
- typename string_type::iterator icur = m_pat->begin();
- if( MULTILINE != ( MULTILINE & m_flags ) &&
- 1 == pgroup->calternates() &&
- icur != m_pat->end() &&
- BEGIN_LINE == sy.reg_token( icur, m_pat->end() ) )
- {
- m_flags = ( REGEX_FLAGS ) ( m_flags & ~RIGHTMOST );
- m_floop = false;
- }
- // Optimization: if first 2 characters of pattern string are ".*" or ".+",
- // then we only need to try recursive_match_all_ once
- icur = m_pat->begin();
- if( RIGHTMOST != ( RIGHTMOST & m_flags ) &&
- SINGLELINE == ( SINGLELINE & m_flags ) &&
- 1 == pgroup->calternates() &&
- icur != m_pat->end() &&
- MATCH_ANY == sy.reg_token( icur, m_pat->end() ) &&
- icur != m_pat->end() )
- {
- switch( sy.quant_token( icur, m_pat->end() ) )
- {
- case ONE_OR_MORE:
- case ZERO_OR_MORE:
- case ONE_OR_MORE_MIN:
- case ZERO_OR_MORE_MIN:
- m_floop = false;
- }
- }
- }
- template< typename CI, typename SY >
- REGEXPR_H_INLINE void basic_rpattern_base<CI, SY>::set_substitution( string_type const & subst )
- {
- std::auto_ptr<string_type> temp_subst( new string_type( subst ) );
- detail::subst_list_type temp_subst_list;
- bool uses_backrefs = false;
- _normalize_string( *temp_subst );
- basic_rpattern_base<CI, SY>::_parse_subst( *temp_subst, uses_backrefs, temp_subst_list );
- detail::swap_auto_ptr( temp_subst, m_subst );
- std::swap( uses_backrefs, m_fuses_backrefs );
- temp_subst_list.swap( m_subst_list );
- }
- template< typename CI, typename SY >
- inline detail::match_group_base<CI> * basic_rpattern_base<CI, SY>::_find_next_group(
- iter_wrap & iw,
- detail::match_group_base<CI> * pgroup_enclosing, syntax_type & sy,
- std::vector<detail::match_group_base<CI>*> & rggroups )
- {
- std::auto_ptr<detail::match_group_base<CI> > pgroup;
- typename string_type::iterator itemp = iw.ipat;
- REGEX_FLAGS old_flags = sy.get_flags();
- TOKEN tok;
- size_t extent_start = m_cgroups;
- bool fconditional = false;
- // Look for group extensions.
- if( m_pat->end() != iw.ipat && NO_TOKEN != ( tok = sy.ext_token( iw.ipat, m_pat->end() ) ) )
- {
- if( m_pat->begin() == itemp || m_pat->end() == iw.ipat )
- throw bad_regexpr( "ill-formed regular expression" );
- // Is this a recursion element?
- if( EXT_RECURSE == tok )
- {
- pgroup_enclosing->add_item( detail::create_recurse<CI>( m_arena ) );
- // This pattern could recurse deeply. Note that fact here so that
- // we can opt to use a stack-conservative algorithm at match time.
- m_fok_to_recurse = false;
- }
- // Don't process empty groups like (?:) or (?i) or (?R)
- if( END_GROUP != sy.reg_token( itemp = iw.ipat, m_pat->end() ) )
- {
- switch( tok )
- {
- case EXT_NOBACKREF:
- // note that this group is not visible, so we can fix
- // up offsets into the backref vector later
- m_invisible_groups.push_back( m_cgroups );
- detail::assign_auto_ptr( pgroup, new( m_arena ) detail::match_group<CI>( _get_next_group_nbr(), m_arena ) );
- break;
- case EXT_INDEPENDENT:
- m_invisible_groups.push_back( m_cgroups );
- detail::assign_auto_ptr( pgroup, new( m_arena ) detail::independent_group<CI>( _get_next_group_nbr(), m_arena ) );
- break;
- case EXT_POS_LOOKAHEAD:
- detail::assign_auto_ptr( pgroup, new( m_arena ) detail::lookahead_assertion<CI>( true, m_arena ) );
- break;
- case EXT_NEG_LOOKAHEAD:
- detail::assign_auto_ptr( pgroup, new( m_arena ) detail::lookahead_assertion<CI>( false, m_arena ) );
- break;
- case EXT_POS_LOOKBEHIND:
- detail::assign_auto_ptr( pgroup, new( m_arena ) detail::lookbehind_assertion<CI>( true, m_arena ) );
- break;
- case EXT_NEG_LOOKBEHIND:
- detail::assign_auto_ptr( pgroup, new( m_arena ) detail::lookbehind_assertion<CI>( false, m_arena ) );
- break;
- case EXT_CONDITION:
- fconditional = true;
- m_invisible_groups.push_back( m_cgroups );
- if( size_t cbackref = detail::parse_int( iw.ipat, m_pat->end() ) &&
- END_GROUP == sy.reg_token( iw.ipat, m_pat->end() ) )
- {
- detail::assign_auto_ptr(
- pgroup, detail::create_backref_conditional<CI>(
- _get_next_group_nbr(), cbackref, m_arena ) );
- }
- else
- {
- switch( sy.ext_token( itemp = iw.ipat, m_pat->end() ) )
- {
- case EXT_POS_LOOKAHEAD:
- case EXT_NEG_LOOKAHEAD:
- case EXT_POS_LOOKBEHIND:
- case EXT_NEG_LOOKBEHIND:
- {
- std::auto_ptr<detail::match_group_base<CI> > pgroup_tmp(
- _find_next_group( iw, NULL, sy, rggroups ) );
- detail::assign_auto_ptr(
- pgroup, detail::create_assertion_conditional<CI>(
- _get_next_group_nbr(), pgroup_tmp.get(), m_arena ) );
- pgroup_tmp.release();
- }
- break;
- default:
- throw bad_regexpr( "bad extension sequence" );
- }
- }
- break;
- case EXT_COMMENT:
- while( END_GROUP != ( tok = sy.reg_token( iw.ipat, m_pat->end() ) ) )
- {
- if( NO_TOKEN == tok && m_pat->end() != iw.ipat )
- ++iw.ipat;
- if( m_pat->end() == iw.ipat )
- throw bad_regexpr( "Expecting end of comment" );
- }
- break;
- default:
- throw bad_regexpr( "bad extension sequence" );
- }
- }
- else
- {
- // Skip over the END_GROUP token
- iw.ipat = itemp;
- }
- }
- else
- {
- detail::assign_auto_ptr( pgroup, new( m_arena ) detail::match_group<CI>( _get_next_group_nbr(), m_arena ) );
- ++m_cgroups_visible;
- }
- if( NULL != pgroup.get() )
- {
- pgroup->add_alternate();
- while( _find_next( iw, pgroup.get(), sy, rggroups ) );
- pgroup->end_alternate();
- // if this is a conditional group, then there must be at
- // most 2 alternates.
- if( fconditional && 2 < pgroup->calternates() )
- throw bad_regexpr( "Too many alternates in conditional subexpression" );
- // Add this group to the rggroups array
- if( size_t( -1 ) != pgroup->group_number() )
- {
- if( pgroup->group_number() >= rggroups.size() )
- rggroups.resize( pgroup->group_number() + 1, NULL );
- rggroups[ pgroup->group_number() ] = pgroup.get();
- }
- // tell this group how many groups are contained within it
- pgroup->set_extent( detail::extent( extent_start, m_cgroups - extent_start ) );
- // If this is not a pattern modifier, restore the
- // flags to their previous settings. This causes
- // pattern modifiers to have the scope of their
- // enclosing group.
- sy.set_flags( old_flags );
- }
- return pgroup.release();
- }
- namespace detail
- {
- // If we reached the end of the string before finding the end of the
- // character set, then this is an ill-formed regex
- template< typename CI >
- inline void check_iter( CI icur, CI istop )
- {
- if( istop == icur )
- throw bad_regexpr( "expecting end of character set" );
- }
- template< typename II, typename CI >
- inline typename std::iterator_traits<CI>::value_type get_escaped_char( II & icur, CI iend, bool normalize )
- {
- typedef typename std::iterator_traits<CI>::value_type CH;
- CH ch = 0, i;
- check_iter<CI>( icur, iend );
- switch( *icur )
- {
- // octal escape sequence
- case REGEX_CHAR(CH,'0'): case REGEX_CHAR(CH,'1'): case REGEX_CHAR(CH,'2'): case REGEX_CHAR(CH,'3'):
- case REGEX_CHAR(CH,'4'): case REGEX_CHAR(CH,'5'): case REGEX_CHAR(CH,'6'): case REGEX_CHAR(CH,'7'):
- ch = CH( *icur++ - REGEX_CHAR(CH,'0') );
- for( i=0; i<2 && REGEX_CHAR(CH,'0') <= *icur && REGEX_CHAR(CH,'7') >= *icur; check_iter<CI>( ++icur, iend ) )
- ch = CH( ch * 8 + ( *icur - REGEX_CHAR(CH,'0') ) );
- break;
- // bell character
- case REGEX_CHAR(CH,'a'):
- if( ! normalize )
- goto default_;
- ch = REGEX_CHAR(CH,'a');
- ++icur;
- break;
- // control character
- case REGEX_CHAR(CH,'c'):
- check_iter<CI>( ++icur, iend );
- ch = *icur++;
- if( REGEX_CHAR(CH,'a') <= ch && REGEX_CHAR(CH,'z') >= ch )
- ch = detail::regex_toupper( ch );
- ch ^= 0x40;
- break;
- // escape character
- case REGEX_CHAR(CH,'e'):
- ch = 27;
- ++icur;
- break;
- // formfeed character
- case REGEX_CHAR(CH,'f'):
- if( ! normalize )
- goto default_;
- ch = REGEX_CHAR(CH,'f');
- ++icur;
- break;
- // newline
- case REGEX_CHAR(CH,'n'):
- if( ! normalize )
- goto default_;
- ch = REGEX_CHAR(CH,'n');
- ++icur;
- break;
- // return
- case REGEX_CHAR(CH,'r'):
- if( ! normalize )
- goto default_;
- ch = REGEX_CHAR(CH,'r');
- ++icur;
- break;
- // horizontal tab
- case REGEX_CHAR(CH,'t'):
- if( ! normalize )
- goto default_;
- ch = REGEX_CHAR(CH,'t');
- ++icur;
- break;
- // vertical tab
- case REGEX_CHAR(CH,'v'):
- if( ! normalize )
- goto default_;
- ch = REGEX_CHAR(CH,'v');
- ++icur;
- break;
- // hex escape sequence
- case REGEX_CHAR(CH,'x'):
- for( ++icur, ch=i=0; i<2 && detail::regex_isxdigit( *icur ); check_iter<CI>( ++icur, iend ) )
- ch = CH( ch * 16 + detail::regex_xdigit2int( *icur ) );
- break;
- // backslash
- case REGEX_CHAR(CH,'\'):
- if( ! normalize )
- goto default_;
- ch = REGEX_CHAR(CH,'\');
- ++icur;
- break;
- // all other escaped characters represent themselves
- default: default_:
- ch = *icur;
- ++icur;
- break;
- }
- return ch;
- }
- template< typename CH, typename CS, typename SY >
- inline void parse_charset(
- std::auto_ptr<CS> & pnew,
- typename std::basic_string<CH>::iterator & icur,
- typename std::basic_string<CH>::const_iterator iend,
- SY & sy )
- {
- typedef CH char_type;
- typedef std::basic_string<CH> string_type;
- typedef typename string_type::const_iterator CI;
- typename string_type::iterator itemp = icur;
- bool const normalize = ( NORMALIZE == ( NORMALIZE & sy.get_flags() ) );
- if( iend != itemp && CHARSET_NEGATE == sy.charset_token( itemp, iend ) )
- {
- pnew->m_fcompliment = true;
- icur = itemp;
- }
- TOKEN tok;
- char_type ch_prev = 0;
- bool fhave_prev = false;
- charset const * pcharset = NULL;
- typename string_type::iterator iprev = icur;
- bool const fnocase = ( NOCASE == ( NOCASE & sy.get_flags() ) );
- check_iter<CI>( icur, iend );
- // remember the current position and grab the next token
- tok = sy.charset_token( icur, iend );
- do
- {
- check_iter<CI>( icur, iend );
- if( CHARSET_RANGE == tok && fhave_prev )
- {
- // remember the current position
- typename string_type::iterator iprev2 = icur;
- fhave_prev = false;
- // ch_prev is lower bound of a range
- switch( sy.charset_token( icur, iend ) )
- {
- case CHARSET_RANGE:
- case CHARSET_NEGATE:
- icur = iprev2; // un-get these tokens and fall through
- case NO_TOKEN:
- pnew->set_bit_range( ch_prev, *icur++, fnocase );
- continue;
- case CHARSET_ESCAPE: // BUGBUG user-defined charset?
- pnew->set_bit_range( ch_prev, get_escaped_char( icur, iend, normalize ), fnocase );
- continue;
- case CHARSET_BACKSPACE:
- pnew->set_bit_range( ch_prev, char_type( 8 ), fnocase ); // backspace
- continue;
- case CHARSET_END: // fall through
- default: // not a range.
- icur = iprev; // backup to range token
- pnew->set_bit( ch_prev, fnocase );
- pnew->set_bit( *icur++, fnocase );
- continue;
- }
- }
- if( fhave_prev )
- pnew->set_bit( ch_prev, fnocase );
- fhave_prev = false;
- switch( tok )
- {
- // None of the intrinsic charsets are case-sensitive,
- // so no special handling must be done when the NOCASE
- // flag is set.
- case CHARSET_RANGE:
- case CHARSET_NEGATE:
- case CHARSET_END:
- icur = iprev; // un-get these tokens
- ch_prev = *icur++;
- fhave_prev = true;
- continue;
- case CHARSET_BACKSPACE:
- ch_prev = char_type( 8 ); // backspace
- fhave_prev = true;
- continue;
- case ESC_DIGIT:
- *pnew |= intrinsic_charsets<char_type>::get_digit_charset();
- continue;
- case ESC_NOT_DIGIT:
- *pnew |= intrinsic_charsets<char_type>::get_not_digit_charset();
- continue;
- case ESC_SPACE:
- *pnew |= intrinsic_charsets<char_type>::get_space_charset();
- continue;
- case ESC_NOT_SPACE:
- *pnew |= intrinsic_charsets<char_type>::get_not_space_charset();
- continue;
- case ESC_WORD:
- *pnew |= intrinsic_charsets<char_type>::get_word_charset();
- continue;
- case ESC_NOT_WORD:
- *pnew |= intrinsic_charsets<char_type>::get_not_word_charset();
- continue;
- case CHARSET_ALNUM:
- pnew->m_posixcharson |= ( _ALNUM );
- continue;
- case CHARSET_NOT_ALNUM:
- pnew->m_posixcharsoff.push_back( _ALNUM );
- continue;
- case CHARSET_ALPHA:
- pnew->m_posixcharson |= ( _ALPHA );
- continue;
- case CHARSET_NOT_ALPHA:
- pnew->m_posixcharsoff.push_back( _ALPHA );
- continue;
- case CHARSET_BLANK:
- pnew->m_posixcharson |= ( _BLANK );
- continue;
- case CHARSET_NOT_BLANK:
- pnew->m_posixcharsoff.push_back( _BLANK );
- continue;
- case CHARSET_CNTRL:
- pnew->m_posixcharson |= ( _CONTROL );
- continue;
- case CHARSET_NOT_CNTRL:
- pnew->m_posixcharsoff.push_back( _CONTROL );
- continue;
- case CHARSET_DIGIT:
- pnew->m_posixcharson |= ( _DIGIT );
- continue;
- case CHARSET_NOT_DIGIT:
- pnew->m_posixcharsoff.push_back( _DIGIT );
- continue;
- case CHARSET_GRAPH:
- pnew->m_posixcharson |= ( _GRAPH );
- continue;
- case CHARSET_NOT_GRAPH:
- pnew->m_posixcharsoff.push_back( _GRAPH );
- continue;
- case CHARSET_LOWER:
- if( NOCASE == ( NOCASE & sy.get_flags() ) )
- pnew->m_posixcharson |= ( _LOWER|_UPPER );
- else
- pnew->m_posixcharson |= ( _LOWER );
- continue;
- case CHARSET_NOT_LOWER:
- if( NOCASE == ( NOCASE & sy.get_flags() ) )
- pnew->m_posixcharsoff.push_back( _LOWER|_UPPER );
- else
- pnew->m_posixcharsoff.push_back( _LOWER );
- continue;
- case CHARSET_PRINT:
- pnew->m_posixcharson |= ( _PRINT );
- continue;
- case CHARSET_NOT_PRINT:
- pnew->m_posixcharsoff.push_back( _PRINT );
- continue;
- case CHARSET_PUNCT:
- pnew->m_posixcharson |= ( _PUNCT );
- continue;
- case CHARSET_NOT_PUNCT:
- pnew->m_posixcharsoff.push_back( _PUNCT );
- continue;
- case CHARSET_SPACE:
- pnew->m_posixcharson |= ( _SPACE );
- continue;
- case CHARSET_NOT_SPACE:
- pnew->m_posixcharsoff.push_back( _SPACE );
- continue;
- case CHARSET_UPPER:
- if( NOCASE == ( NOCASE & sy.get_flags() ) )
- pnew->m_posixcharson |= ( _UPPER|_LOWER );
- else
- pnew->m_posixcharson |= ( _UPPER );
- continue;
- case CHARSET_NOT_UPPER:
- if( NOCASE == ( NOCASE & sy.get_flags() ) )
- pnew->m_posixcharsoff.push_back( _UPPER|_LOWER );
- else
- pnew->m_posixcharsoff.push_back( _UPPER );
- continue;
- case CHARSET_XDIGIT:
- pnew->m_posixcharson |= ( _HEX );
- continue;
- case CHARSET_NOT_XDIGIT:
- pnew->m_posixcharsoff.push_back( _HEX );
- continue;
- case CHARSET_ESCAPE:
- // Maybe this is a user-defined intrinsic charset
- pcharset = get_altern_charset( *icur, sy );
- if( NULL != pcharset )
- {
- *pnew |= *pcharset;
- ++icur;
- continue;
- }
- else
- {
- ch_prev = get_escaped_char( icur, iend, normalize );
- fhave_prev = true;
- }
- continue;
- default:
- ch_prev = *icur++;
- fhave_prev = true;
- continue;
- }
- }
- while( check_iter<CI>( iprev = icur, iend ),
- CHARSET_END != ( tok = sy.charset_token( icur, iend ) ) );
- if( fhave_prev )
- pnew->set_bit( ch_prev, fnocase );
- pnew->optimize( type2type<char_type>() );
- }
- template< typename CH, typename SY >
- inline charset const * get_altern_charset( CH ch, SY & sy )
- {
- typedef std::basic_string<CH> string_type;
- charset const * pcharset = NULL;
- regex::detail::charset_map<CH> & charset_map = sy.s_charset_map;
- typename regex::detail::charset_map<CH>::iterator iter = charset_map.find( ch );
- if( charset_map.end() != iter )
- {
- bool const fnocase = ( NOCASE == ( sy.get_flags() & NOCASE ) );
- pcharset = iter->second.m_rgcharsets[ fnocase ];
- if( NULL == pcharset )
- {
- // tmp takes ownership of any ptrs.
- charset_map_node<CH> tmp = iter->second;
- charset_map.erase( iter ); // prevent possible infinite recursion
- typename string_type::iterator istart = tmp.m_str.begin();
- std::auto_ptr<charset> pnew( new charset );
- std::auto_ptr<charset const> pold( tmp.m_rgcharsets[ !fnocase ] );
- parse_charset<CH, charset>( pnew, istart, tmp.m_str.end(), sy );
- tmp.m_rgcharsets[ fnocase ] = pcharset = pnew.get();
- charset_map[ ch ] = tmp; // could throw
- // charset_map has taken ownership of these pointers now.
- pnew.release();
- pold.release();
- }
- }
- return pcharset;
- }
- } // namespace detail
- //
- // Read ahead through the pattern and treat sequential atoms
- // as a single atom, making sure to handle quantification
- // correctly. Warning: dense code ahead.
- //
- template< typename CI, typename SY >
- inline void basic_rpattern_base<CI, SY>::_find_atom(
- iter_wrap & iw,
- detail::match_group_base<CI> * pgroup,
- syntax_type & sy )
- {
- typename string_type::iterator itemp = iw.ipat, istart;
- size_t const nstart = std::distance( m_pat->begin(), iw.ipat );
- do
- {
- if( itemp != iw.ipat ) // Is there whitespace to skip?
- {
- size_t dist = std::distance( m_pat->begin(), iw.ipat );
- m_pat->erase( iw.ipat, itemp ); // erase the whitespace from the patttern
- std::advance( iw.ipat = m_pat->begin(), dist );
- if( m_pat->end() == ( itemp = iw.ipat ) ) // are we at the end of the pattern?
- break;
- }
- switch( sy.quant_token( itemp, m_pat->end() ) )
- {
- // if {, } can't be interpreted as quantifiers, treat them as regular chars
- case BEGIN_RANGE:
- std::advance( istart = m_pat->begin(), nstart );
- if( istart != iw.ipat ) // treat as a quantifier
- goto quantify;
- case NO_TOKEN:
- case END_RANGE:
- case END_RANGE_MIN:
- case RANGE_SEPARATOR:
- break;
- default:
- std::advance( istart = m_pat->begin(), nstart );
- if( istart == iw.ipat ) // must be able to quantify something.
- throw bad_regexpr( "quantifier not expected" );
- quantify: if( istart != --iw.ipat )
- pgroup->add_item( detail::create_literal<CI>( istart, iw.ipat, sy.get_flags(), m_arena ) );
- std::auto_ptr<detail::sub_expr<CI> > pnew( detail::create_char<CI>( *iw.ipat++, sy.get_flags(), m_arena ) );
- _quantify( pnew, iw, false, sy );
- pgroup->add_item( pnew.release() );
- return;
- }
- } while( m_pat->end() != ++iw.ipat && ! sy.reg_token( itemp = iw.ipat, m_pat->end() ) );
- std::advance( istart = m_pat->begin(), nstart );
- assert( iw.ipat != istart );
- pgroup->add_item( detail::create_literal<CI>( istart, iw.ipat, sy.get_flags(), m_arena ) );
- }
- template< typename CI, typename SY >
- inline bool basic_rpattern_base<CI, SY>::_find_next(
- iter_wrap & iw,
- detail::match_group_base<CI> * pgroup,
- syntax_type & sy,
- std::vector<detail::match_group_base<CI>*> & rggroups )
- {
- typedef char_type CH;
- std::auto_ptr<detail::sub_expr<CI> > pnew;
- std::auto_ptr<detail::custom_charset> pcs;
- typename string_type::iterator istart, itemp;
- bool fdone, is_group = false;
- bool const normalize = ( NORMALIZE == ( NORMALIZE & sy.get_flags() ) );
- if( m_pat->end() == iw.ipat )
- {
- if( 0 != pgroup->group_number() )
- throw bad_regexpr( "mismatched parenthesis" );
- return false;
- }
- switch( sy.reg_token( iw.ipat, m_pat->end() ) )
- {
- case NO_TOKEN: // not a token. Must be an atom
- if( m_pat->end() == iw.ipat )
- {
- if( 0 != pgroup->group_number() )
- throw bad_regexpr( "mismatched parenthesis" );
- return false;
- }
- _find_atom( iw, pgroup, sy );
- return true;
- case END_GROUP:
- if( 0 == pgroup->group_number() )
- throw bad_regexpr( "mismatched parenthesis" );
- return false;
- case ALTERNATION:
- pgroup->end_alternate();
- pgroup->add_alternate();
- return true;
- case BEGIN_GROUP:
- // Find next group. could return NULL if the group is really
- // a pattern modifier, like: ( ?s-i )
- detail::assign_auto_ptr( pnew, _find_next_group( iw, pgroup, sy, rggroups ) );
- is_group = true;
- break;
- case BEGIN_LINE:
- detail::assign_auto_ptr( pnew, detail::create_bol<CI>( sy.get_flags(), m_arena ) );
- break;
- case END_LINE:
- detail::assign_auto_ptr( pnew, detail::create_eol<CI>( sy.get_flags(), m_arena ) );
- break;
- case BEGIN_CHARSET:
- detail::assign_auto_ptr( pcs, new( m_arena ) detail::custom_charset( m_arena ) );
- detail::parse_charset<char_type, detail::custom_charset>(
- pcs, iw.ipat, m_pat->end(), sy );
- detail::assign_auto_ptr( pnew,
- detail::create_custom_charset<CI>( pcs.get(), sy.get_flags(), m_arena ) );
- pcs.release();
- break;
- case MATCH_ANY:
- detail::assign_auto_ptr( pnew, detail::create_any<CI>( sy.get_flags(), m_arena ) );
- break;
- case ESC_WORD_BOUNDARY:
- detail::assign_auto_ptr( pnew, detail::create_word_boundary<CI>( true, sy.get_flags(), m_arena ) );
- break;
- case ESC_NOT_WORD_BOUNDARY:
- detail::assign_auto_ptr( pnew, detail::create_word_boundary<CI>( false, sy.get_flags(), m_arena ) );
- break;
- case ESC_WORD_START:
- detail::assign_auto_ptr( pnew, detail::create_word_start<CI>( sy.get_flags(), m_arena ) );
- break;
- case ESC_WORD_STOP:
- detail::assign_auto_ptr( pnew, detail::create_word_stop<CI>( sy.get_flags(), m_arena ) );
- break;
- case ESC_DIGIT:
- detail::assign_auto_ptr( pnew, detail::create_charset<CI>( detail::intrinsic_charsets<char_type>::get_digit_charset(), sy.get_flags(), m_arena ) );
- break;
- case ESC_NOT_DIGIT:
- detail::assign_auto_ptr( pnew, detail::create_charset<CI>( detail::intrinsic_charsets<char_type>::get_not_digit_charset(), sy.get_flags(), m_arena ) );
- break;
- case ESC_WORD:
- detail::assign_auto_ptr( pnew, detail::create_charset<CI>( detail::intrinsic_charsets<char_type>::get_word_charset(), sy.get_flags(), m_arena ) );
- break;
- case ESC_NOT_WORD:
- detail::assign_auto_ptr( pnew, detail::create_charset<CI>( detail::intrinsic_charsets<char_type>::get_not_word_charset(), sy.get_flags(), m_arena ) );
- break;
- case ESC_SPACE:
- detail::assign_auto_ptr( pnew, detail::create_charset<CI>( detail::intrinsic_charsets<char_type>::get_space_charset(), sy.get_flags(), m_arena ) );
- break;
- case ESC_NOT_SPACE:
- detail::assign_auto_ptr( pnew, detail::create_charset<CI>( detail::intrinsic_charsets<char_type>::get_not_space_charset(), sy.get_flags(), m_arena ) );
- break;
- case ESC_BEGIN_STRING:
- detail::assign_auto_ptr( pnew, detail::create_bos<CI>( sy.get_flags(), m_arena ) );
- break;
- case ESC_END_STRING:
- detail::assign_auto_ptr( pnew, detail::create_eos<CI>( sy.get_flags(), m_arena ) );
- break;
- case ESC_END_STRING_z:
- detail::assign_auto_ptr( pnew, detail::create_eoz<CI>( sy.get_flags(), m_arena ) );
- break;
- case ESCAPE:
- if( m_pat->end() == iw.ipat )
- {
- // BUGBUG what if the escape sequence is more that 1 character?
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( *--iw.ipat, sy.get_flags(), m_arena ) );
- ++iw.ipat;
- }
- else if( REGEX_CHAR(CH,'0') <= *iw.ipat && REGEX_CHAR(CH,'9') >= *iw.ipat )
- {
- // Parse at most 3 decimal digits.
- size_t nbackref = detail::parse_int( itemp = iw.ipat, m_pat->end(), 999 );
- // If the resulting number could conceivably be a backref, then it is.
- if( REGEX_CHAR(CH,'0') != *iw.ipat && ( 10 > nbackref || nbackref < _cgroups_total() ) )
- {
- detail::assign_auto_ptr( pnew, detail::create_backref<CI>( nbackref, sy.get_flags(), m_arena ) );
- iw.ipat = itemp;
- }
- else
- {
- // It's an octal character escape sequence. If *ipat is 8 or 9, insert
- // a NULL character, and leave the 8 or 9 as a character literal.
- char_type ch = 0, i = 0;
- for( ; i < 3 && m_pat->end() != iw.ipat && REGEX_CHAR(CH,'0') <= *iw.ipat && REGEX_CHAR(CH,'7') >= *iw.ipat; ++i, ++iw.ipat )
- ch = CH( ch * 8 + ( *iw.ipat - REGEX_CHAR(CH,'0') ) );
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( ch, sy.get_flags(), m_arena ) );
- }
- }
- else if( REGEX_CHAR(CH,'e') == *iw.ipat )
- {
- ++iw.ipat;
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( CH( 27 ), sy.get_flags(), m_arena ) );
- }
- else if( REGEX_CHAR(CH,'x') == *iw.ipat )
- {
- char_type ch = 0, i = 0;
- for( ++iw.ipat; i < 2 && m_pat->end() != iw.ipat && detail::regex_isxdigit( *iw.ipat ); ++i, ++iw.ipat )
- ch = CH( ch * 16 + detail::regex_xdigit2int( *iw.ipat ) );
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( ch, sy.get_flags(), m_arena ) );
- }
- else if( REGEX_CHAR(CH,'c') == *iw.ipat )
- {
- if( m_pat->end() == ++iw.ipat )
- throw bad_regexpr( "incomplete escape sequence \c" );
- char_type ch = *iw.ipat++;
- if( REGEX_CHAR(CH,'a') <= ch && REGEX_CHAR(CH,'z') >= ch )
- ch = detail::regex_toupper( ch );
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( CH( ch ^ 0x40 ), sy.get_flags(), m_arena ) );
- }
- else if( REGEX_CHAR(CH,'a') == *iw.ipat && normalize )
- {
- ++iw.ipat;
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( REGEX_CHAR(CH,'a'), sy.get_flags(), m_arena ) );
- }
- else if( REGEX_CHAR(CH,'f') == *iw.ipat && normalize )
- {
- ++iw.ipat;
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( REGEX_CHAR(CH,'f'), sy.get_flags(), m_arena ) );
- }
- else if( REGEX_CHAR(CH,'n') == *iw.ipat && normalize )
- {
- ++iw.ipat;
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( REGEX_CHAR(CH,'n'), sy.get_flags(), m_arena ) );
- }
- else if( REGEX_CHAR(CH,'r') == *iw.ipat && normalize )
- {
- ++iw.ipat;
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( REGEX_CHAR(CH,'r'), sy.get_flags(), m_arena ) );
- }
- else if( REGEX_CHAR(CH,'t') == *iw.ipat && normalize )
- {
- ++iw.ipat;
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( REGEX_CHAR(CH,'t'), sy.get_flags(), m_arena ) );
- }
- else if( REGEX_CHAR(CH,'\') == *iw.ipat && normalize )
- {
- ++iw.ipat;
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( REGEX_CHAR(CH,'\'), sy.get_flags(), m_arena ) );
- }
- else
- {
- // Is this a user-defined intrinsic character set?
- detail::charset const * pcharset = detail::get_altern_charset( *iw.ipat, sy );
- if( NULL != pcharset )
- detail::assign_auto_ptr( pnew, detail::create_charset<CI>( *pcharset, sy.get_flags(), m_arena ) );
- else
- detail::assign_auto_ptr( pnew, detail::create_char<CI>( *iw.ipat, sy.get_flags(), m_arena ) );
- ++iw.ipat;
- }
- break;
- // If quotemeta, loop until we find quotemeta off or end of string
- case ESC_QUOTE_META_ON:
- for( istart = itemp = iw.ipat, fdone = false; !fdone && m_pat->end() != iw.ipat; )
- {
- switch( sy.reg_token( iw.ipat, m_pat->end() ) )
- {
- case ESC_QUOTE_META_OFF:
- fdone = true;
- break;
- case NO_TOKEN:
- if( m_pat->end() != iw.ipat )
- ++iw.ipat; // fallthrough
- default:
- itemp = iw.ipat;
- break;
- }
- }
- if( itemp != istart )
- pgroup->add_item( detail::create_literal<CI>( istart, itemp, sy.get_flags(), m_arena ) );
- // skip the quantification code below
- return true;
- // Should never get here for valid patterns
- case ESC_QUOTE_META_OFF:
- throw bad_regexpr( "quotemeta turned off, but was never turned on" );
- default:
- assert( ! "Unhandled token type" );
- break;
- }
- // If pnew is null, then the current subexpression is a no-op.
- if( pnew.get() )
- {
- // Look for quantifiers
- _quantify( pnew, iw, is_group, sy );
- // Add the item to the group
- pgroup->add_item( pnew.release() );
- }
- return true;
- }
- template< typename CI, typename SY >
- inline void basic_rpattern_base<CI, SY>::_quantify(
- std::auto_ptr<detail::sub_expr<CI> > & pnew,
- iter_wrap & iw,
- bool is_group,
- syntax_type & sy )
- {
- if( m_pat->end() != iw.ipat && ! pnew->is_assertion() )
- {
- typename string_type::iterator itemp = iw.ipat, itemp2;
- bool fmin = false;
- // Since size_t is unsigned, -1 is really the largest size_t
- size_t lbound = ( size_t )-1;
- size_t ubound = ( size_t )-1;
- size_t ubound_tmp;
- switch( sy.quant_token( itemp, m_pat->end() ) )
- {
- case ZERO_OR_MORE_MIN:
- fmin = true;
- case ZERO_OR_MORE:
- lbound = 0;
- break;
- case ONE_OR_MORE_MIN:
- fmin = true;
- case ONE_OR_MORE:
- lbound = 1;
- break;
- case ZERO_OR_ONE_MIN:
- fmin = true;
- case ZERO_OR_ONE:
- lbound = 0;
- ubound = 1;
- break;
- case BEGIN_RANGE:
- lbound = detail::parse_int( itemp, m_pat->end() );
- if( m_pat->end() == itemp )
- return; // not a valid quantifier - treat as atom
- switch( sy.quant_token( itemp, m_pat->end() ) )
- {
- case END_RANGE_MIN:
- fmin = true;
- case END_RANGE:
- ubound = lbound;
- break;
- case RANGE_SEPARATOR:
- itemp2 = itemp;
- ubound_tmp = detail::parse_int( itemp, m_pat->end() );
- if( itemp != itemp2 )
- ubound = ubound_tmp;
- if( itemp == m_pat->end() )
- return; // not a valid quantifier - treat as atom
- switch( sy.quant_token( itemp, m_pat->end() ) )
- {
- case END_RANGE_MIN:
- fmin = true;
- case END_RANGE:
- break;
- default:
- return; // not a valid quantifier - treat as atom
- }
- break;
- default:
- return; // not a valid quantifier - treat as atom
- }
- if( ubound < lbound )
- throw bad_regexpr( "Can't do {n, m} with n > m" );
- break;
- }
- if( ( size_t )-1 != lbound )
- {
- // If we are quantifying a group, then this pattern could recurse
- // deeply. Note that fact here so that we can opt to use a stack-
- // conservative algorithm at match time.
- if( is_group && ubound > 16 )
- m_fok_to_recurse = false;
- std::auto_ptr<detail::sub_expr<CI> > pquant( pnew->quantify( lbound, ubound, ! fmin, m_arena ) );
- pnew.release();
- detail::assign_auto_ptr( pnew, pquant.release() );
- iw.ipat = itemp;
- }
- }
- }
- template< typename CI, typename SY >
- inline void basic_rpattern_base<CI, SY>::_add_subst_backref(
- detail::subst_node & snode,
- size_t nbackref,
- size_t rstart,
- bool & uses_backrefs,
- detail::subst_list_type & subst_list ) const
- {
- uses_backrefs = true;
- assert( detail::subst_node::SUBST_STRING == snode.stype );
- if( snode.subst_string.rlength )
- subst_list.push_back( snode );
- snode.stype = detail::subst_node::SUBST_BACKREF;
- snode.subst_backref = nbackref;
- subst_list.push_back( snode );
- // re-initialize the detail::subst_node
- snode.stype = detail::subst_node::SUBST_STRING;
- snode.subst_string.rstart = rstart;
- snode.subst_string.rlength = 0;
- }
- template< typename CI, typename SY >
- inline void basic_rpattern_base<CI, SY>::_parse_subst(
- string_type & subst,
- bool & uses_backrefs,
- detail::subst_list_type & subst_list ) const
- {
- TOKEN tok;
- detail::subst_node snode;
- typename string_type::iterator icur = subst.begin();
- size_t nbackref;
- typename string_type::iterator itemp;
- bool fdone;
- syntax_type sy( m_flags );
- uses_backrefs = false;
- // Initialize the subst_node
- snode.stype = detail::subst_node::SUBST_STRING;
- snode.subst_string.rstart = 0;
- snode.subst_string.rlength = 0;
- while( subst.end() != icur )
- {
- switch( tok = sy.subst_token( icur, subst.end() ) )
- {
- case SUBST_MATCH:
- _add_subst_backref( snode, 0, std::distance( subst.begin(), icur ), uses_backrefs, subst_list );
- break;
- case SUBST_PREMATCH:
- _add_subst_backref( snode, ( size_t )detail::subst_node::PREMATCH, std::distance( subst.begin(), icur ), uses_backrefs, subst_list );
- break;
- case SUBST_POSTMATCH:
- _add_subst_backref( snode, ( size_t )detail::subst_node::POSTMATCH, std::distance( subst.begin(), icur ), uses_backrefs, subst_list );
- break;
- case SUBST_BACKREF:
- nbackref = detail::parse_int( icur, subst.end(), cgroups() - 1 ); // always at least 1 group
- if( 0 == nbackref )
- throw bad_regexpr( "invalid backreference in substitution" );
- _add_subst_backref( snode, nbackref, std::distance( subst.begin(), icur ), uses_backrefs, subst_list );
- break;
- case SUBST_QUOTE_META_ON:
- assert( detail::subst_node::SUBST_STRING == snode.stype );
- if( snode.subst_string.rlength )
- subst_list.push_back( snode );
- snode.subst_string.rstart = std::distance( subst.begin(), icur );
- for( itemp = icur, fdone = false; !fdone && subst.end() != icur; )
- {
- switch( tok = sy.subst_token( icur, subst.end() ) )
- {
- case SUBST_ALL_OFF:
- fdone = true;
- break;
- case NO_TOKEN:
- ++icur; // fall-through
- default:
- itemp = icur;
- break;
- }
- }
- snode.subst_string.rlength = std::distance( subst.begin(), itemp ) - snode.subst_string.rstart;
- if( snode.subst_string.rlength )
- subst_list.push_back( snode );
- if( tok == SUBST_ALL_OFF )
- {
- snode.stype = detail::subst_node::SUBST_OP;
- snode.op = detail::subst_node::ALL_OFF;
- subst_list.push_back( snode );
- }
- // re-initialize the subst_node
- snode.stype = detail::subst_node::SUBST_STRING;
- snode.subst_string.rstart = std::distance( subst.begin(), icur );
- snode.subst_string.rlength = 0;
- break;
- case SUBST_UPPER_ON:
- case SUBST_UPPER_NEXT:
- case SUBST_LOWER_ON:
- case SUBST_LOWER_NEXT:
- case SUBST_ALL_OFF:
- assert( detail::subst_node::SUBST_STRING == snode.stype );
- if( snode.subst_string.rlength )
- subst_list.push_back( snode );
- snode.stype = detail::subst_node::SUBST_OP;
- snode.op = ( detail::subst_node::op_type ) tok;
- subst_list.push_back( snode );
- // re-initialize the subst_node
- snode.stype = detail::subst_node::SUBST_STRING;
- snode.subst_string.rstart = std::distance( subst.begin(), icur );
- snode.subst_string.rlength = 0;
- break;
- case SUBST_ESCAPE:
- if( subst.end() == icur )
- throw bad_regexpr( "expecting escape sequence in substitution string" );
- assert( detail::subst_node::SUBST_STRING == snode.stype );
- if( snode.subst_string.rlength )
- subst_list.push_back( snode );
- snode.subst_string.rstart = std::distance( subst.begin(), icur++ );
- snode.subst_string.rlength = 1;
- break;
- case NO_TOKEN:
- default:
- ++snode.subst_string.rlength;
- ++icur;
- break;
- }
- }
- assert( detail::subst_node::SUBST_STRING == snode.stype );
- if( snode.subst_string.rlength )
- subst_list.push_back( snode );
- }
- template< typename CH >
- REGEXPR_H_INLINE void reset_intrinsic_charsets( CH )
- {
- detail::intrinsic_charsets<CH>::reset();
- }
- typedef ::regex::detail::select
- <
- REGEX_FOLD_INSTANTIATIONS &&
- detail::is_convertible<char const *,std::string::const_iterator>::value,
- std::string::const_iterator,
- char const *
- >::type lpcstr_t;
- typedef ::regex::detail::select
- <
- REGEX_FOLD_INSTANTIATIONS &&
- detail::is_convertible<wchar_t const *,std::wstring::const_iterator>::value,
- std::wstring::const_iterator,
- wchar_t const *
- >::type lpcwstr_t;
- namespace
- {
- // Used to fake the compiler into implicitly instantiating the templates we need
- bool g_regex_false;
- template< typename CI, typename SY >
- struct rpattern_instantiator
- {
- typedef ::regex::basic_rpattern<CI,SY> rpattern_type;
- typedef ::regex::basic_match_results<CI> results_type;
- typedef typename rpattern_type::char_type char_type;
- typedef typename rpattern_type::string_type string_type;
- rpattern_instantiator()
- {
- if( g_regex_false )
- {
- string_type const str;
- CI ci = CI();
- results_type res;
- rpattern_type pat;
- rpattern_type pat1( str );
- rpattern_type pat2( str, str );
- rpattern_type pat3( pat );
- pat3 = pat;
- pat.init( str );
- pat.init( str, str );
- pat.set_substitution( str );
- //pat.match( &*ci, res ); // could cause a static assert
- pat.match( ci, ci, res );
- //pat.count( &*ci ); // could cause a static assert
- pat.count( ci, ci );
- reset_intrinsic_charsets( char_type() );
- // These force VC6 to create COMDATs for set_substitution and the two init methods
- void (*preset)( char_type ) = & reset_intrinsic_charsets;
- (*preset)( char_type() );
- void (rpattern_type::*psetsub)( string_type const & ) = & rpattern_type::set_substitution;
- (pat.*psetsub)( str );
- void (rpattern_type::*pinit1)( string_type const &, REGEX_FLAGS, REGEX_MODE ) = & rpattern_type::init;
- (pat.*pinit1)( str, NOFLAGS, MODE_DEFAULT );
- void (rpattern_type::*pinit2)( string_type const &, string_type const &, REGEX_FLAGS, REGEX_MODE ) = & rpattern_type::init;
- (pat.*pinit2)( str, str, NOFLAGS, MODE_DEFAULT );
- }
- }
- };
- // Here is a rudimentary typelist facility to allow the REGEX_TO_INSTANTIATE
- // list to recursively generate the instantiations we are interested in.
- struct null_type;
- template< typename H, typename T >
- struct cons
- {
- typedef H head;
- typedef T tail;
- };
- template< typename T1=null_type, typename T2=null_type, typename T3=null_type, typename T4=null_type,
- typename T5=null_type, typename T6=null_type, typename T7=null_type, typename T8=null_type >
- struct typelist
- {
- typedef cons< T1, typename typelist<T2,T3,T4,T5,T6,T7,T8,null_type>::type > type;
- };
- template<>
- struct typelist<null_type,null_type,null_type,null_type,null_type,null_type,null_type,null_type>
- {
- typedef null_type type;
- };
- // The recursive_instantiator uses typelists and the rpattern_instantiator
- // to generate instantiations for all the types in the typelist.
- template< typename TYPELIST >
- struct recursive_instantiator
- {
- // The inner struct is needed as a work-around for the lack
- // of partial template specialization.
- template< typename SY >
- struct inner
- {
- inner()
- {
- if( g_regex_false )
- {
- typedef typename TYPELIST::head CI;
- typedef typename ::std::iterator_traits<CI>::value_type char_type;
- typedef typename SY::template rebind<char_type>::other syntax_type;
- rpattern_instantiator<CI,syntax_type> dummy1;
- ( void ) dummy1;
- typedef typename TYPELIST::tail TYPELIST2;
- typedef recursive_instantiator< TYPELIST2 > other;
- typedef typename other::template inner<SY> other_inner;
- other_inner dummy2;
- ( void ) dummy2;
- }
- }
- };
- };
- template<>
- struct recursive_instantiator< null_type >
- {
- template< typename SY >
- struct inner
- {
- };
- };
- // Here is a list of types to instantiate.
- #ifndef REGEX_TO_INSTANTIATE
- # ifdef REGEX_WIDE_AND_NARROW
- # define REGEX_TO_INSTANTIATE std::string::const_iterator,
- std::wstring::const_iterator,
- lpcstr_t,
- lpcwstr_t
- # else
- # define REGEX_TO_INSTANTIATE restring::const_iterator,
- lpctstr_t
- # endif
- #endif
- // Create the perl instantiations
- #ifndef REGEX_NO_PERL
- recursive_instantiator<typelist<REGEX_TO_INSTANTIATE>::type>::inner<perl_syntax<char> > _dummy1;
- #endif
- // Create the posix instantiations
- #ifdef REGEX_POSIX
- recursive_instantiator<typelist<REGEX_TO_INSTANTIATE>::type>::inner<posix_syntax<char> > _dummy2;
- #endif
- } // unnamed namespace
- } // namespace regex