mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			416 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			416 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
| // class template regex -*- C++ -*-
 | |
| 
 | |
| // Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
 | |
| //
 | |
| // This file is part of the GNU ISO C++ Library.  This library is free
 | |
| // software; you can redistribute it and/or modify it under the
 | |
| // terms of the GNU General Public License as published by the
 | |
| // Free Software Foundation; either version 3, or (at your option)
 | |
| // any later version.
 | |
| 
 | |
| // This library is distributed in the hope that it will be useful,
 | |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| // GNU General Public License for more details.
 | |
| 
 | |
| // Under Section 7 of GPL version 3, you are granted additional
 | |
| // permissions described in the GCC Runtime Library Exception, version
 | |
| // 3.1, as published by the Free Software Foundation.
 | |
| 
 | |
| // You should have received a copy of the GNU General Public License and
 | |
| // a copy of the GCC Runtime Library Exception along with this program;
 | |
| // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 | |
| // <http://www.gnu.org/licenses/>.
 | |
| 
 | |
| /**
 | |
|  *  @file bits/regex_nfa.h
 | |
|  *  This is an internal header file, included by other library headers.
 | |
|  *  Do not attempt to use it directly. @headername{regex}
 | |
|  */
 | |
| 
 | |
| namespace std _GLIBCXX_VISIBILITY(default)
 | |
| {
 | |
| namespace __detail
 | |
| {
 | |
| _GLIBCXX_BEGIN_NAMESPACE_VERSION
 | |
| 
 | |
|   /**
 | |
|    * @addtogroup regex-detail
 | |
|    * @{
 | |
|    */
 | |
| 
 | |
|   /// Base class for, um, automata.  Could be an NFA or a DFA.  Your choice.
 | |
|   class _Automaton
 | |
|   {
 | |
|   public:
 | |
|     typedef unsigned int _SizeT;
 | |
| 
 | |
|   public:
 | |
|     virtual
 | |
|     ~_Automaton() { }
 | |
| 
 | |
|     virtual _SizeT
 | |
|     _M_sub_count() const = 0;
 | |
| 
 | |
| #ifdef _GLIBCXX_DEBUG
 | |
|     virtual std::ostream&
 | |
|     _M_dot(std::ostream& __ostr) const = 0;
 | |
| #endif
 | |
|   };
 | |
| 
 | |
|   /// Generic shared pointer to an automaton.  
 | |
|   typedef std::shared_ptr<_Automaton> _AutomatonPtr;
 | |
| 
 | |
|   /// Operation codes that define the type of transitions within the base NFA
 | |
|   /// that represents the regular expression.
 | |
|   enum _Opcode
 | |
|   {
 | |
|       _S_opcode_unknown       =   0,
 | |
|       _S_opcode_alternative   =   1,
 | |
|       _S_opcode_subexpr_begin =   4,
 | |
|       _S_opcode_subexpr_end   =   5,
 | |
|       _S_opcode_match         = 100,
 | |
|       _S_opcode_accept        = 255
 | |
|   };
 | |
| 
 | |
|   /// Provides a generic facade for a templated match_results.
 | |
|   struct _Results
 | |
|   {
 | |
|     virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
 | |
|     virtual void _M_set_matched(int __i, bool __is_matched) = 0;
 | |
|   };
 | |
| 
 | |
|   /// Tags current state (for subexpr begin/end).
 | |
|   typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
 | |
| 
 | |
|   /// Start state tag.
 | |
|   template<typename _FwdIterT, typename _TraitsT>
 | |
|     struct _StartTagger
 | |
|     {
 | |
|       explicit
 | |
|       _StartTagger(int __i)
 | |
|       : _M_index(__i)
 | |
|       { }
 | |
| 
 | |
|       void
 | |
|       operator()(const _PatternCursor& __pc, _Results& __r)
 | |
|       { __r._M_set_pos(_M_index, 0, __pc); }
 | |
| 
 | |
|       int       _M_index;
 | |
|     };
 | |
| 
 | |
|   /// End state tag.
 | |
|   template<typename _FwdIterT, typename _TraitsT>
 | |
|     struct _EndTagger
 | |
|     {
 | |
|       explicit
 | |
|       _EndTagger(int __i)
 | |
|       : _M_index(__i)
 | |
|       { }
 | |
| 
 | |
|       void
 | |
|       operator()(const _PatternCursor& __pc, _Results& __r)
 | |
|       { __r._M_set_pos(_M_index, 1, __pc); }
 | |
| 
 | |
|       int       _M_index;
 | |
|       _FwdIterT _M_pos;
 | |
|     };
 | |
| 
 | |
|   /// Indicates if current state matches cursor current.
 | |
|   typedef std::function<bool (const _PatternCursor&)> _Matcher;
 | |
| 
 | |
|   /// Matches any character
 | |
|   inline bool
 | |
|   _AnyMatcher(const _PatternCursor&)
 | |
|   { return true; }
 | |
| 
 | |
|   /// Matches a single character
 | |
|   template<typename _InIterT, typename _TraitsT>
 | |
|     struct _CharMatcher
 | |
|     {
 | |
|       typedef typename _TraitsT::char_type char_type;
 | |
| 
 | |
|       explicit
 | |
|       _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
 | |
|       : _M_traits(__t), _M_c(_M_traits.translate(__c))
 | |
|       { }
 | |
| 
 | |
|       bool
 | |
|       operator()(const _PatternCursor& __pc) const
 | |
|       {
 | |
| 	typedef const _SpecializedCursor<_InIterT>& _CursorT;
 | |
| 	_CursorT __c = static_cast<_CursorT>(__pc);
 | |
| 	return _M_traits.translate(__c._M_current()) == _M_c;
 | |
|       }
 | |
| 
 | |
|       const _TraitsT& _M_traits;
 | |
|       char_type       _M_c;
 | |
|     };
 | |
| 
 | |
|   /// Matches a character range (bracket expression)
 | |
|   template<typename _InIterT, typename _TraitsT>
 | |
|     struct _RangeMatcher
 | |
|     {
 | |
|       typedef typename _TraitsT::char_type _CharT;
 | |
|       typedef std::basic_string<_CharT>    _StringT;
 | |
| 
 | |
|       explicit
 | |
|       _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
 | |
|       : _M_traits(__t), _M_is_non_matching(__is_non_matching)
 | |
|       { }
 | |
| 
 | |
|       bool
 | |
|       operator()(const _PatternCursor& __pc) const
 | |
|       {
 | |
| 	typedef const _SpecializedCursor<_InIterT>& _CursorT;
 | |
| 	_CursorT __c = static_cast<_CursorT>(__pc);
 | |
| 	return true;
 | |
|       }
 | |
| 
 | |
|       void
 | |
|       _M_add_char(_CharT __c)
 | |
|       { }
 | |
| 
 | |
|       void
 | |
|       _M_add_collating_element(const _StringT& __s)
 | |
|       { }
 | |
| 
 | |
|       void
 | |
|       _M_add_equivalence_class(const _StringT& __s)
 | |
|       { }
 | |
| 
 | |
|       void
 | |
|       _M_add_character_class(const _StringT& __s)
 | |
|       { }
 | |
| 
 | |
|       void
 | |
|       _M_make_range()
 | |
|       { }
 | |
| 
 | |
|       const _TraitsT& _M_traits;
 | |
|       bool            _M_is_non_matching;
 | |
|     };
 | |
| 
 | |
|   /// Identifies a state in the NFA.
 | |
|   typedef int _StateIdT;
 | |
| 
 | |
|   /// The special case in which a state identifier is not an index.
 | |
|   static const _StateIdT _S_invalid_state_id  = -1;
 | |
| 
 | |
| 
 | |
|   /**
 | |
|    * @brief struct _State
 | |
|    *
 | |
|    * An individual state in an NFA
 | |
|    *
 | |
|    * In this case a "state" is an entry in the NFA definition coupled
 | |
|    * with its outgoing transition(s).  All states have a single outgoing
 | |
|    * transition, except for accepting states (which have no outgoing
 | |
|    * transitions) and alt states, which have two outgoing transitions.
 | |
|    */
 | |
|   struct _State
 | |
|   {
 | |
|     typedef int  _OpcodeT;
 | |
| 
 | |
|     _OpcodeT     _M_opcode;    // type of outgoing transition
 | |
|     _StateIdT    _M_next;      // outgoing transition
 | |
|     _StateIdT    _M_alt;       // for _S_opcode_alternative
 | |
|     unsigned int _M_subexpr;   // for _S_opcode_subexpr_*
 | |
|     _Tagger      _M_tagger;    // for _S_opcode_subexpr_*
 | |
|     _Matcher     _M_matches;   // for _S_opcode_match
 | |
| 
 | |
|     explicit _State(_OpcodeT __opcode)
 | |
|     : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
 | |
|     { }
 | |
| 
 | |
|     _State(const _Matcher& __m)
 | |
|     : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
 | |
|     { }
 | |
| 
 | |
|     _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
 | |
|     : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
 | |
|       _M_tagger(__t)
 | |
|     { }
 | |
| 
 | |
|     _State(_StateIdT __next, _StateIdT __alt)
 | |
|     : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
 | |
|     { }
 | |
| 
 | |
| #ifdef _GLIBCXX_DEBUG
 | |
|     std::ostream&
 | |
|     _M_print(std::ostream& ostr) const;
 | |
| 
 | |
|     // Prints graphviz dot commands for state.
 | |
|     std::ostream&
 | |
|     _M_dot(std::ostream& __ostr, _StateIdT __id) const;
 | |
| #endif
 | |
|   };
 | |
| 
 | |
|   
 | |
|   /// The Grep Matcher works on sets of states.  Here are sets of states.
 | |
|   typedef std::set<_StateIdT> _StateSet;
 | |
| 
 | |
|   /**
 | |
|    * @brief struct _Nfa
 | |
|    *
 | |
|    * A collection of all states making up an NFA.
 | |
|    *
 | |
|    * An NFA is a 4-tuple M = (K, S, s, F), where
 | |
|    *    K is a finite set of states,
 | |
|    *    S is the alphabet of the NFA,
 | |
|    *    s is the initial state,
 | |
|    *    F is a set of final (accepting) states.
 | |
|    *
 | |
|    * This NFA class is templated on S, a type that will hold values of the
 | |
|    * underlying alphabet (without regard to semantics of that alphabet).  The
 | |
|    * other elements of the tuple are generated during construction of the NFA
 | |
|    * and are available through accessor member functions.
 | |
|    */
 | |
|   class _Nfa
 | |
|   : public _Automaton, public std::vector<_State>
 | |
|   {
 | |
|   public:
 | |
|     typedef _State                              _StateT;
 | |
|     typedef unsigned int                        _SizeT;
 | |
|     typedef regex_constants::syntax_option_type _FlagT;
 | |
| 
 | |
|     _Nfa(_FlagT __f)
 | |
|     : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
 | |
|     { }
 | |
| 
 | |
|     ~_Nfa()
 | |
|     { }
 | |
| 
 | |
|     _FlagT
 | |
|     _M_options() const
 | |
|     { return _M_flags; }
 | |
| 
 | |
|     _StateIdT
 | |
|     _M_start() const
 | |
|     { return _M_start_state; }
 | |
| 
 | |
|     const _StateSet&
 | |
|     _M_final_states() const
 | |
|     { return _M_accepting_states; }
 | |
| 
 | |
|     _SizeT
 | |
|     _M_sub_count() const
 | |
|     { return _M_subexpr_count; }
 | |
| 
 | |
|     _StateIdT
 | |
|     _M_insert_accept()
 | |
|     {
 | |
|       this->push_back(_StateT(_S_opcode_accept));
 | |
|       _M_accepting_states.insert(this->size()-1);
 | |
|       return this->size()-1;
 | |
|     }
 | |
| 
 | |
|     _StateIdT
 | |
|     _M_insert_alt(_StateIdT __next, _StateIdT __alt)
 | |
|     {
 | |
|       this->push_back(_StateT(__next, __alt));
 | |
|       return this->size()-1;
 | |
|     }
 | |
| 
 | |
|     _StateIdT
 | |
|     _M_insert_matcher(_Matcher __m)
 | |
|     {
 | |
|       this->push_back(_StateT(__m));
 | |
|       return this->size()-1;
 | |
|     }
 | |
| 
 | |
|     _StateIdT
 | |
|     _M_insert_subexpr_begin(const _Tagger& __t)
 | |
|     {
 | |
|       this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++,
 | |
| 			      __t));
 | |
|       return this->size()-1;
 | |
|     }
 | |
| 
 | |
|     _StateIdT 
 | |
|     _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
 | |
|     {
 | |
|       this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
 | |
|       return this->size()-1;
 | |
|     }
 | |
| 
 | |
| #ifdef _GLIBCXX_DEBUG
 | |
|     std::ostream&
 | |
|     _M_dot(std::ostream& __ostr) const;
 | |
| #endif
 | |
| 
 | |
|   private:
 | |
|     _FlagT     _M_flags;
 | |
|     _StateIdT  _M_start_state;
 | |
|     _StateSet  _M_accepting_states;
 | |
|     _SizeT     _M_subexpr_count;
 | |
|   };
 | |
| 
 | |
|   /// Describes a sequence of one or more %_State, its current start
 | |
|   /// and end(s).  This structure contains fragments of an NFA during
 | |
|   /// construction.
 | |
|   class _StateSeq
 | |
|   {
 | |
|   public:
 | |
|     // Constructs a single-node sequence
 | |
|     _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
 | |
|     : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
 | |
|     { }
 | |
|     // Constructs a split sequence from two other sequencces
 | |
|     _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
 | |
|     : _M_nfa(__e1._M_nfa),
 | |
|       _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
 | |
|       _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
 | |
|     { }
 | |
| 
 | |
|     // Constructs a split sequence from a single sequence
 | |
|     _StateSeq(const _StateSeq& __e, _StateIdT __id)
 | |
|     : _M_nfa(__e._M_nfa),
 | |
|       _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
 | |
|       _M_end1(__id), _M_end2(__e._M_end1)
 | |
|     { }
 | |
| 
 | |
|     // Constructs a copy of a %_StateSeq
 | |
|     _StateSeq(const _StateSeq& __rhs)
 | |
|     : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
 | |
|       _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
 | |
|     { }
 | |
| 
 | |
| 
 | |
|     _StateSeq& operator=(const _StateSeq& __rhs);
 | |
| 
 | |
|     _StateIdT
 | |
|     _M_front() const
 | |
|     { return _M_start; }
 | |
| 
 | |
|     // Extends a sequence by one.
 | |
|     void
 | |
|     _M_push_back(_StateIdT __id);
 | |
| 
 | |
|     // Extends and maybe joins a sequence.
 | |
|     void
 | |
|     _M_append(_StateIdT __id);
 | |
| 
 | |
|     void
 | |
|     _M_append(_StateSeq& __rhs);
 | |
| 
 | |
|     // Clones an entire sequence.
 | |
|     _StateIdT
 | |
|     _M_clone();
 | |
| 
 | |
|   private:
 | |
|     _Nfa&     _M_nfa;
 | |
|     _StateIdT _M_start;
 | |
|     _StateIdT _M_end1;
 | |
|     _StateIdT _M_end2;
 | |
| 
 | |
|   };
 | |
| 
 | |
|  //@} regex-detail
 | |
| _GLIBCXX_END_NAMESPACE_VERSION
 | |
| } // namespace __detail
 | |
| } // namespace std
 | |
| 
 | |
| #include <bits/regex_nfa.tcc>
 | |
| 
 |