mirror of git://gcc.gnu.org/git/gcc.git
regex_automaton.h (_NFA<>::_M_insert_repeat): Add _S_opcode_repeat support to distingush a loop from _S_opcode_alternative.
2014-04-27 Tim Shen <timshen91@gmail.com> * include/bits/regex_automaton.h (_NFA<>::_M_insert_repeat): Add _S_opcode_repeat support to distingush a loop from _S_opcode_alternative. * include/bits/regex_automaton.tcc (_State_base::_M_print, _State_base::_M_dot, _NFA<>::_M_eliminate_dummy, _StateSeq<>::_M_clone): Likewise. * include/bits/regex_compiler.tcc (_Compiler<>::_M_quantifier): Likewise. * include/bits/regex_executor.tcc (_Executor<>::_M_dfs): Likewise. * include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma): Uglify local variable __i. * include/bits/regex_compiler.h (_BracketMatcher<>::_M_make_cache): Use size_t instead of int to compare with vector::size(). 2014-04-27 Tim Shen <timshen91@gmail.com> * include/bits/regex_executor.h: Add _M_rep_count to track how many times this repeat node are visited. * include/bits/regex_executor.tcc (_Executor<>::_M_rep_once_more, _Executor<>::_M_dfs): Use _M_rep_count to prevent entering infinite loop. 2014-04-27 Tim Shen <timshen91@gmail.com> * include/bits/regex.tcc (__regex_algo_impl<>): Remove _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT and use _GLIBCXX_REGEX_USE_THOMPSON_NFA instead. * include/bits/regex_automaton.h: Remove quantifier counting variable. * include/bits/regex_automaton.tcc (_State_base::_M_dot): Adjust debug NFA dump. From-SVN: r209844
This commit is contained in:
parent
f956adb961
commit
a670a9bb54
|
|
@ -1,3 +1,36 @@
|
||||||
|
2014-04-27 Tim Shen <timshen91@gmail.com>
|
||||||
|
|
||||||
|
* include/bits/regex_automaton.h (_NFA<>::_M_insert_repeat):
|
||||||
|
Add _S_opcode_repeat support to distingush a loop from
|
||||||
|
_S_opcode_alternative.
|
||||||
|
* include/bits/regex_automaton.tcc (_State_base::_M_print,
|
||||||
|
_State_base::_M_dot, _NFA<>::_M_eliminate_dummy,
|
||||||
|
_StateSeq<>::_M_clone): Likewise.
|
||||||
|
* include/bits/regex_compiler.tcc (_Compiler<>::_M_quantifier):
|
||||||
|
Likewise.
|
||||||
|
* include/bits/regex_executor.tcc (_Executor<>::_M_dfs): Likewise.
|
||||||
|
* include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma):
|
||||||
|
Uglify local variable __i.
|
||||||
|
* include/bits/regex_compiler.h (_BracketMatcher<>::_M_make_cache):
|
||||||
|
Use size_t instead of int to compare with vector::size().
|
||||||
|
|
||||||
|
2014-04-27 Tim Shen <timshen91@gmail.com>
|
||||||
|
|
||||||
|
* include/bits/regex_executor.h: Add _M_rep_count to track how
|
||||||
|
many times this repeat node are visited.
|
||||||
|
* include/bits/regex_executor.tcc (_Executor<>::_M_rep_once_more,
|
||||||
|
_Executor<>::_M_dfs): Use _M_rep_count to prevent entering
|
||||||
|
infinite loop.
|
||||||
|
|
||||||
|
2014-04-27 Tim Shen <timshen91@gmail.com>
|
||||||
|
|
||||||
|
* include/bits/regex.tcc (__regex_algo_impl<>): Remove
|
||||||
|
_GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT and use
|
||||||
|
_GLIBCXX_REGEX_USE_THOMPSON_NFA instead.
|
||||||
|
* include/bits/regex_automaton.h: Remove quantifier counting variable.
|
||||||
|
* include/bits/regex_automaton.tcc (_State_base::_M_dot):
|
||||||
|
Adjust debug NFA dump.
|
||||||
|
|
||||||
2014-04-25 Lars Gullik Bjønnes <larsbj@gullik.org>
|
2014-04-25 Lars Gullik Bjønnes <larsbj@gullik.org>
|
||||||
|
|
||||||
PR libstdc++/60710
|
PR libstdc++/60710
|
||||||
|
|
|
||||||
|
|
@ -28,12 +28,12 @@
|
||||||
* Do not attempt to use it directly. @headername{regex}
|
* Do not attempt to use it directly. @headername{regex}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// See below __regex_algo_impl to get what this is talking about. The default
|
// A non-standard switch to let the user pick the matching algorithm.
|
||||||
// value 1 indicated a conservative optimization without giving up worst case
|
// If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
|
||||||
// performance.
|
// algorithm will be used. This algorithm is not enabled by default,
|
||||||
#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
|
// and cannot be used if the regex contains back-references, but has better
|
||||||
#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
|
// (polynomial instead of exponential) worst case performace.
|
||||||
#endif
|
// See __regex_algo_impl below.
|
||||||
|
|
||||||
namespace std _GLIBCXX_VISIBILITY(default)
|
namespace std _GLIBCXX_VISIBILITY(default)
|
||||||
{
|
{
|
||||||
|
|
@ -66,24 +66,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
for (auto& __it : __res)
|
for (auto& __it : __res)
|
||||||
__it.matched = false;
|
__it.matched = false;
|
||||||
|
|
||||||
// This function decide which executor to use under given circumstances.
|
// __policy is used by testsuites so that they can use Thompson NFA
|
||||||
// The _S_auto policy now is the following: if a NFA has no
|
// without defining a macro. Users should define
|
||||||
// back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
|
// _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
|
||||||
// quantifiers (*, +, ?), the BFS executor will be used, other wise
|
|
||||||
// DFS executor. This is because DFS executor has a exponential upper
|
|
||||||
// bound, but better best-case performace. Meanwhile, BFS executor can
|
|
||||||
// effectively prevent from exponential-long time matching (which must
|
|
||||||
// contains many quantifiers), but it's slower in average.
|
|
||||||
//
|
|
||||||
// For simple regex, BFS executor could be 2 or more times slower than
|
|
||||||
// DFS executor.
|
|
||||||
//
|
|
||||||
// Of course, BFS executor cannot handle back-references.
|
|
||||||
bool __ret;
|
bool __ret;
|
||||||
if (!__re._M_automaton->_M_has_backref
|
if (!__re._M_automaton->_M_has_backref
|
||||||
&& (__policy == _RegexExecutorPolicy::_S_alternate
|
#ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
|
||||||
|| __re._M_automaton->_M_quant_count
|
&& __policy == _RegexExecutorPolicy::_S_alternate
|
||||||
> _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
|
#endif
|
||||||
|
)
|
||||||
{
|
{
|
||||||
_Executor<_BiIter, _Alloc, _TraitsT, false>
|
_Executor<_BiIter, _Alloc, _TraitsT, false>
|
||||||
__executor(__s, __e, __m, __re, __flags);
|
__executor(__s, __e, __m, __re, __flags);
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{
|
{
|
||||||
_S_opcode_unknown,
|
_S_opcode_unknown,
|
||||||
_S_opcode_alternative,
|
_S_opcode_alternative,
|
||||||
|
_S_opcode_repeat,
|
||||||
_S_opcode_backref,
|
_S_opcode_backref,
|
||||||
_S_opcode_line_begin_assertion,
|
_S_opcode_line_begin_assertion,
|
||||||
_S_opcode_line_end_assertion,
|
_S_opcode_line_end_assertion,
|
||||||
|
|
@ -74,9 +75,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
size_t _M_backref_index; // for _S_opcode_backref
|
size_t _M_backref_index; // for _S_opcode_backref
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
// for _S_opcode_alternative.
|
// for _S_opcode_alternative, _S_opcode_repeat and
|
||||||
_StateIdT _M_quant_index;
|
// _S_opcode_subexpr_lookahead
|
||||||
// for _S_opcode_alternative or _S_opcode_subexpr_lookahead
|
|
||||||
_StateIdT _M_alt;
|
_StateIdT _M_alt;
|
||||||
// for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
|
// for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
|
||||||
// quantifiers (ungreedy if set true)
|
// quantifiers (ungreedy if set true)
|
||||||
|
|
@ -120,7 +120,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
explicit
|
explicit
|
||||||
_NFA_base(_FlagT __f)
|
_NFA_base(_FlagT __f)
|
||||||
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
|
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
|
||||||
_M_quant_count(0), _M_has_backref(false)
|
_M_has_backref(false)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
_NFA_base(_NFA_base&&) = default;
|
_NFA_base(_NFA_base&&) = default;
|
||||||
|
|
@ -145,7 +145,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_FlagT _M_flags;
|
_FlagT _M_flags;
|
||||||
_StateIdT _M_start_state;
|
_StateIdT _M_start_state;
|
||||||
_SizeT _M_subexpr_count;
|
_SizeT _M_subexpr_count;
|
||||||
_SizeT _M_quant_count;
|
|
||||||
bool _M_has_backref;
|
bool _M_has_backref;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -175,7 +174,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_StateT __tmp(_S_opcode_alternative);
|
_StateT __tmp(_S_opcode_alternative);
|
||||||
// It labels every quantifier to make greedy comparison easier in BFS
|
// It labels every quantifier to make greedy comparison easier in BFS
|
||||||
// approach.
|
// approach.
|
||||||
__tmp._M_quant_index = this->_M_quant_count++;
|
__tmp._M_next = __next;
|
||||||
|
__tmp._M_alt = __alt;
|
||||||
|
return _M_insert_state(std::move(__tmp));
|
||||||
|
}
|
||||||
|
|
||||||
|
_StateIdT
|
||||||
|
_M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg)
|
||||||
|
{
|
||||||
|
_StateT __tmp(_S_opcode_repeat);
|
||||||
|
// It labels every quantifier to make greedy comparison easier in BFS
|
||||||
|
// approach.
|
||||||
__tmp._M_next = __next;
|
__tmp._M_next = __next;
|
||||||
__tmp._M_alt = __alt;
|
__tmp._M_alt = __alt;
|
||||||
__tmp._M_neg = __neg;
|
__tmp._M_neg = __neg;
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
switch (_M_opcode)
|
switch (_M_opcode)
|
||||||
{
|
{
|
||||||
case _S_opcode_alternative:
|
case _S_opcode_alternative:
|
||||||
|
case _S_opcode_repeat:
|
||||||
ostr << "alt next=" << _M_next << " alt=" << _M_alt;
|
ostr << "alt next=" << _M_next << " alt=" << _M_alt;
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_begin:
|
case _S_opcode_subexpr_begin:
|
||||||
|
|
@ -72,11 +73,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
switch (_M_opcode)
|
switch (_M_opcode)
|
||||||
{
|
{
|
||||||
case _S_opcode_alternative:
|
case _S_opcode_alternative:
|
||||||
|
case _S_opcode_repeat:
|
||||||
__ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
|
__ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
|
||||||
<< __id << " -> " << _M_next
|
<< __id << " -> " << _M_next
|
||||||
<< " [label=\"epsilon\", tailport=\"s\"];\n"
|
<< " [label=\"next\", tailport=\"s\"];\n"
|
||||||
<< __id << " -> " << _M_alt
|
<< __id << " -> " << _M_alt
|
||||||
<< " [label=\"epsilon\", tailport=\"n\"];\n";
|
<< " [label=\"alt\", tailport=\"n\"];\n";
|
||||||
break;
|
break;
|
||||||
case _S_opcode_backref:
|
case _S_opcode_backref:
|
||||||
__ostr << __id << " [label=\"" << __id << "\\nBACKREF "
|
__ostr << __id << " [label=\"" << __id << "\\nBACKREF "
|
||||||
|
|
@ -174,6 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
== _S_opcode_dummy)
|
== _S_opcode_dummy)
|
||||||
__it._M_next = (*this)[__it._M_next]._M_next;
|
__it._M_next = (*this)[__it._M_next]._M_next;
|
||||||
if (__it._M_opcode == _S_opcode_alternative
|
if (__it._M_opcode == _S_opcode_alternative
|
||||||
|
|| __it._M_opcode == _S_opcode_repeat
|
||||||
|| __it._M_opcode == _S_opcode_subexpr_lookahead)
|
|| __it._M_opcode == _S_opcode_subexpr_lookahead)
|
||||||
while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
|
while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
|
||||||
== _S_opcode_dummy)
|
== _S_opcode_dummy)
|
||||||
|
|
@ -198,6 +201,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
auto __id = _M_nfa._M_insert_state(__dup);
|
auto __id = _M_nfa._M_insert_state(__dup);
|
||||||
__m[__u] = __id;
|
__m[__u] = __id;
|
||||||
if (__dup._M_opcode == _S_opcode_alternative
|
if (__dup._M_opcode == _S_opcode_alternative
|
||||||
|
|| __dup._M_opcode == _S_opcode_repeat
|
||||||
|| __dup._M_opcode == _S_opcode_subexpr_lookahead)
|
|| __dup._M_opcode == _S_opcode_subexpr_lookahead)
|
||||||
if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1)
|
if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1)
|
||||||
__stack.push(__dup._M_alt);
|
__stack.push(__dup._M_alt);
|
||||||
|
|
@ -217,6 +221,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
__ref._M_next = __m[__ref._M_next];
|
__ref._M_next = __m[__ref._M_next];
|
||||||
}
|
}
|
||||||
if (__ref._M_opcode == _S_opcode_alternative
|
if (__ref._M_opcode == _S_opcode_alternative
|
||||||
|
|| __ref._M_opcode == _S_opcode_repeat
|
||||||
|| __ref._M_opcode == _S_opcode_subexpr_lookahead)
|
|| __ref._M_opcode == _S_opcode_subexpr_lookahead)
|
||||||
if (__ref._M_alt != _S_invalid_state_id)
|
if (__ref._M_alt != _S_invalid_state_id)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -421,7 +421,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
void
|
void
|
||||||
_M_make_cache(true_type)
|
_M_make_cache(true_type)
|
||||||
{
|
{
|
||||||
for (int __i = 0; __i < _M_cache.size(); __i++)
|
for (size_t __i = 0; __i < _M_cache.size(); __i++)
|
||||||
_M_cache[static_cast<_UnsignedCharT>(__i)] =
|
_M_cache[static_cast<_UnsignedCharT>(__i)] =
|
||||||
_M_apply(__i, false_type());
|
_M_apply(__i, false_type());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -188,8 +188,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{
|
{
|
||||||
__init();
|
__init();
|
||||||
auto __e = _M_pop();
|
auto __e = _M_pop();
|
||||||
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
|
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_repeat(_S_invalid_state_id,
|
||||||
__e._M_start, __neg));
|
__e._M_start, __neg));
|
||||||
__e._M_append(__r);
|
__e._M_append(__r);
|
||||||
_M_stack.push(__r);
|
_M_stack.push(__r);
|
||||||
}
|
}
|
||||||
|
|
@ -197,8 +197,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{
|
{
|
||||||
__init();
|
__init();
|
||||||
auto __e = _M_pop();
|
auto __e = _M_pop();
|
||||||
__e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start,
|
__e._M_append(_M_nfa._M_insert_repeat(_S_invalid_state_id,
|
||||||
__neg));
|
__e._M_start, __neg));
|
||||||
_M_stack.push(__e);
|
_M_stack.push(__e);
|
||||||
}
|
}
|
||||||
else if (_M_match_token(_ScannerT::_S_token_opt))
|
else if (_M_match_token(_ScannerT::_S_token_opt))
|
||||||
|
|
@ -206,8 +206,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
__init();
|
__init();
|
||||||
auto __e = _M_pop();
|
auto __e = _M_pop();
|
||||||
auto __end = _M_nfa._M_insert_dummy();
|
auto __end = _M_nfa._M_insert_dummy();
|
||||||
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
|
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_repeat(_S_invalid_state_id,
|
||||||
__e._M_start, __neg));
|
__e._M_start, __neg));
|
||||||
__e._M_append(__end);
|
__e._M_append(__end);
|
||||||
__r._M_append(__end);
|
__r._M_append(__end);
|
||||||
_M_stack.push(__r);
|
_M_stack.push(__r);
|
||||||
|
|
@ -244,8 +244,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{
|
{
|
||||||
auto __tmp = __r._M_clone();
|
auto __tmp = __r._M_clone();
|
||||||
_StateSeqT __s(_M_nfa,
|
_StateSeqT __s(_M_nfa,
|
||||||
_M_nfa._M_insert_alt(_S_invalid_state_id,
|
_M_nfa._M_insert_repeat(_S_invalid_state_id,
|
||||||
__tmp._M_start, __neg));
|
__tmp._M_start, __neg));
|
||||||
__tmp._M_append(__s);
|
__tmp._M_append(__s);
|
||||||
__e._M_append(__s);
|
__e._M_append(__s);
|
||||||
}
|
}
|
||||||
|
|
@ -261,8 +261,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
for (long __i = 0; __i < __n; ++__i)
|
for (long __i = 0; __i < __n; ++__i)
|
||||||
{
|
{
|
||||||
auto __tmp = __r._M_clone();
|
auto __tmp = __r._M_clone();
|
||||||
auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
|
auto __alt = _M_nfa._M_insert_repeat(__tmp._M_start,
|
||||||
__end, __neg);
|
__end, __neg);
|
||||||
__stack.push(__alt);
|
__stack.push(__alt);
|
||||||
__e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
|
__e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -73,6 +73,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_M_results(__results),
|
_M_results(__results),
|
||||||
_M_match_queue(__dfs_mode ? nullptr
|
_M_match_queue(__dfs_mode ? nullptr
|
||||||
: new vector<pair<_StateIdT, _ResultsVec>>()),
|
: new vector<pair<_StateIdT, _ResultsVec>>()),
|
||||||
|
_M_rep_count(_M_nfa.size()),
|
||||||
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
|
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
|
||||||
_M_flags((__flags & regex_constants::match_prev_avail)
|
_M_flags((__flags & regex_constants::match_prev_avail)
|
||||||
? (__flags
|
? (__flags
|
||||||
|
|
@ -102,6 +103,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_M_search();
|
_M_search();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
template<bool __match_mode>
|
||||||
|
void
|
||||||
|
_M_rep_once_more(_StateIdT);
|
||||||
|
|
||||||
template<bool __match_mode>
|
template<bool __match_mode>
|
||||||
void
|
void
|
||||||
_M_dfs(_StateIdT __start);
|
_M_dfs(_StateIdT __start);
|
||||||
|
|
@ -149,9 +154,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_ResultsVec& _M_results;
|
_ResultsVec& _M_results;
|
||||||
// Used in BFS, saving states that need to be considered for the next
|
// Used in BFS, saving states that need to be considered for the next
|
||||||
// character.
|
// character.
|
||||||
std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
|
unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
|
||||||
// Used in BFS, indicating that which state is already visited.
|
// Used in BFS, indicating that which state is already visited.
|
||||||
std::unique_ptr<vector<bool>> _M_visited;
|
vector<pair<_BiIter, int>> _M_rep_count;
|
||||||
|
unique_ptr<vector<bool>> _M_visited;
|
||||||
_FlagT _M_flags;
|
_FlagT _M_flags;
|
||||||
// To record current solution.
|
// To record current solution.
|
||||||
_StateIdT _M_start_state;
|
_StateIdT _M_start_state;
|
||||||
|
|
|
||||||
|
|
@ -161,7 +161,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Use a function vector to dispatch, instead of using switch-case.
|
// __rep_count records how many times (__rep_count.second)
|
||||||
|
// this node is visited under certain input iterator
|
||||||
|
// (__rep_count.first). This prevent the executor from entering
|
||||||
|
// infinite loop by refusing to continue when it's already been
|
||||||
|
// visited more than twice. It's `twice` instead of `once` because
|
||||||
|
// we need to spare one more time for potential group capture.
|
||||||
|
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
||||||
|
bool __dfs_mode>
|
||||||
|
template<bool __match_mode>
|
||||||
|
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
|
||||||
|
_M_rep_once_more(_StateIdT __i)
|
||||||
|
{
|
||||||
|
const auto& __state = _M_nfa[__i];
|
||||||
|
auto& __rep_count = _M_rep_count[__i];
|
||||||
|
if (__rep_count.second == 0 || __rep_count.first != _M_current)
|
||||||
|
{
|
||||||
|
auto __back = __rep_count;
|
||||||
|
__rep_count.first = _M_current;
|
||||||
|
__rep_count.second = 1;
|
||||||
|
_M_dfs<__match_mode>(__state._M_alt);
|
||||||
|
__rep_count = __back;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (__rep_count.second < 2)
|
||||||
|
{
|
||||||
|
__rep_count.second++;
|
||||||
|
_M_dfs<__match_mode>(__state._M_alt);
|
||||||
|
__rep_count.second--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
||||||
bool __dfs_mode>
|
bool __dfs_mode>
|
||||||
template<bool __match_mode>
|
template<bool __match_mode>
|
||||||
|
|
@ -184,69 +216,61 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
// of this quantifier". Executing _M_next first or _M_alt first don't
|
// of this quantifier". Executing _M_next first or _M_alt first don't
|
||||||
// mean the same thing, and we need to choose the correct order under
|
// mean the same thing, and we need to choose the correct order under
|
||||||
// given greedy mode.
|
// given greedy mode.
|
||||||
case _S_opcode_alternative:
|
case _S_opcode_repeat:
|
||||||
// Greedy.
|
{
|
||||||
if (!__state._M_neg)
|
// Greedy.
|
||||||
{
|
if (!__state._M_neg)
|
||||||
// "Once more" is preferred in greedy mode.
|
{
|
||||||
_M_dfs<__match_mode>(__state._M_alt);
|
_M_rep_once_more<__match_mode>(__i);
|
||||||
// If it's DFS executor and already accepted, we're done.
|
// If it's DFS executor and already accepted, we're done.
|
||||||
if (!__dfs_mode || !_M_has_sol)
|
if (!__dfs_mode || !_M_has_sol)
|
||||||
_M_dfs<__match_mode>(__state._M_next);
|
|
||||||
}
|
|
||||||
else // Non-greedy mode
|
|
||||||
{
|
|
||||||
if (__dfs_mode)
|
|
||||||
{
|
|
||||||
// vice-versa.
|
|
||||||
_M_dfs<__match_mode>(__state._M_next);
|
_M_dfs<__match_mode>(__state._M_next);
|
||||||
if (!_M_has_sol)
|
}
|
||||||
_M_dfs<__match_mode>(__state._M_alt);
|
else // Non-greedy mode
|
||||||
}
|
{
|
||||||
else
|
if (__dfs_mode)
|
||||||
{
|
{
|
||||||
// DON'T attempt anything, because there's already another
|
// vice-versa.
|
||||||
// state with higher priority accepted. This state cannot be
|
_M_dfs<__match_mode>(__state._M_next);
|
||||||
// better by attempting its next node.
|
if (!_M_has_sol)
|
||||||
if (!_M_has_sol)
|
_M_rep_once_more<__match_mode>(__i);
|
||||||
{
|
}
|
||||||
_M_dfs<__match_mode>(__state._M_next);
|
else
|
||||||
// DON'T attempt anything if it's already accepted. An
|
{
|
||||||
// accepted state *must* be better than a solution that
|
// DON'T attempt anything, because there's already another
|
||||||
// matches a non-greedy quantifier one more time.
|
// state with higher priority accepted. This state cannot be
|
||||||
if (!_M_has_sol)
|
// better by attempting its next node.
|
||||||
_M_dfs<__match_mode>(__state._M_alt);
|
if (!_M_has_sol)
|
||||||
}
|
{
|
||||||
}
|
_M_dfs<__match_mode>(__state._M_next);
|
||||||
|
// DON'T attempt anything if it's already accepted. An
|
||||||
|
// accepted state *must* be better than a solution that
|
||||||
|
// matches a non-greedy quantifier one more time.
|
||||||
|
if (!_M_has_sol)
|
||||||
|
_M_rep_once_more<__match_mode>(__i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_begin:
|
case _S_opcode_subexpr_begin:
|
||||||
// If there's nothing changed since last visit, do NOT continue.
|
{
|
||||||
// This prevents the executor from get into infinite loop when using
|
auto& __res = _M_cur_results[__state._M_subexpr];
|
||||||
// "()*" to match "".
|
auto __back = __res.first;
|
||||||
if (!_M_cur_results[__state._M_subexpr].matched
|
__res.first = _M_current;
|
||||||
|| _M_cur_results[__state._M_subexpr].first != _M_current)
|
_M_dfs<__match_mode>(__state._M_next);
|
||||||
{
|
__res.first = __back;
|
||||||
auto& __res = _M_cur_results[__state._M_subexpr];
|
}
|
||||||
auto __back = __res.first;
|
|
||||||
__res.first = _M_current;
|
|
||||||
_M_dfs<__match_mode>(__state._M_next);
|
|
||||||
__res.first = __back;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_end:
|
case _S_opcode_subexpr_end:
|
||||||
if (_M_cur_results[__state._M_subexpr].second != _M_current
|
{
|
||||||
|| _M_cur_results[__state._M_subexpr].matched != true)
|
auto& __res = _M_cur_results[__state._M_subexpr];
|
||||||
{
|
auto __back = __res;
|
||||||
auto& __res = _M_cur_results[__state._M_subexpr];
|
__res.second = _M_current;
|
||||||
auto __back = __res;
|
__res.matched = true;
|
||||||
__res.second = _M_current;
|
|
||||||
__res.matched = true;
|
|
||||||
_M_dfs<__match_mode>(__state._M_next);
|
|
||||||
__res = __back;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
_M_dfs<__match_mode>(__state._M_next);
|
_M_dfs<__match_mode>(__state._M_next);
|
||||||
|
__res = __back;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case _S_opcode_line_begin_assertion:
|
case _S_opcode_line_begin_assertion:
|
||||||
if (_M_at_begin())
|
if (_M_at_begin())
|
||||||
|
|
@ -339,6 +363,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case _S_opcode_alternative:
|
||||||
|
_M_dfs<__match_mode>(__state._M_alt);
|
||||||
|
if (!__dfs_mode || !_M_has_sol)
|
||||||
|
_M_dfs<__match_mode>(__state._M_next);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
_GLIBCXX_DEBUG_ASSERT(false);
|
_GLIBCXX_DEBUG_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -335,7 +335,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
else if (__c == 'x' || __c == 'u')
|
else if (__c == 'x' || __c == 'u')
|
||||||
{
|
{
|
||||||
_M_value.erase();
|
_M_value.erase();
|
||||||
for (int i = 0; i < (__c == 'x' ? 2 : 4); i++)
|
for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++)
|
||||||
{
|
{
|
||||||
if (_M_current == _M_end
|
if (_M_current == _M_end
|
||||||
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current))
|
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current))
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ test01()
|
||||||
const char s[] = "";
|
const char s[] = "";
|
||||||
VERIFY( regex_match_debug(s, m, re) );
|
VERIFY( regex_match_debug(s, m, re) );
|
||||||
}
|
}
|
||||||
|
VERIFY(regex_match_debug("", regex("(?:)*")));
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue