algobase.h: Uglify internal identifiers.

2009-09-16  Johannes Singler  <singler@ira.uka.de>

        * include/parallel/algobase.h: Uglify internal identifiers.
        * include/parallel/algo.h: Likewise.
        * include/parallel/algorithmfwd.h: Likewise.
        * include/parallel/balanced_quicksort.h: Likewise.
        * include/parallel/base.h: Likewise.
        * include/parallel/checkers.h: Likewise.
        * include/parallel/compatibility.h: Likewise.
        * include/parallel/compiletime_settings.: Likewise.
        * include/parallel/equally_split.h: Likewise.
        * include/parallel/features.h: Likewise.
        * include/parallel/find.h: Likewise.
        * include/parallel/find_selectors.h: Likewise.
        * include/parallel/for_each.h: Likewise.
        * include/parallel/for_each_selectors.h: Likewise.
        * include/parallel/iterator.h: Likewise.
        * include/parallel/list_partition.h: Likewise.
        * include/parallel/losertree.h: Likewise.
        * include/parallel/merge.h: Likewise.
        * include/parallel/multiseq_selection.h: Likewise.
        * include/parallel/multiway_merge.h: Likewise.
        * include/parallel/multiway_mergesort.h: Likewise.
        * include/parallel/numeric: Likewise.
        * include/parallel/numericfwd.h: Likewise.
        * include/parallel/omp_loop.h: Likewise.
        * include/parallel/omp_loop_static.h: Likewise.
        * include/parallel/par_loop.h: Likewise.
        * include/parallel/partial_sum.h: Likewise.
        * include/parallel/partition.h: Likewise.
        * include/parallel/queue.h: Likewise.
        * include/parallel/quicksort.h: Likewise.
        * include/parallel/random_number.h: Likewise.
        * include/parallel/random_shuffle.h: Likewise.
        * include/parallel/search.h: Likewise.
        * include/parallel/set_operations.h: Likewise.
        * include/parallel/settings.h: Likewise.
        * include/parallel/sort.h: Likewise.
        * include/parallel/tags.h: Likewise.
        * include/parallel/types.h: Likewise.
        * include/parallel/unique_copy.h: Likewise.
        * include/parallel/workstealing.h: Likewise.

From-SVN: r151741
This commit is contained in:
Johannes Singler 2009-09-16 09:47:25 +00:00 committed by Johannes Singler
parent 4075e7e8dc
commit 1acba85b37
41 changed files with 7013 additions and 6967 deletions

View File

@ -1,3 +1,49 @@
2009-09-16 Johannes Singler <singler@ira.uka.de>
* include/parallel/algobase.h: Uglify internal identifiers.
* include/parallel/algo.h: Likewise.
* include/parallel/algorithm: Likewise.
* include/parallel/algorithmfwd.h: Likewise.
* include/parallel/balanced_quicksort.h: Likewise.
* include/parallel/base.h: Likewise.
* include/parallel/basic_iterator.h: Likewise.
* include/parallel/checkers.h: Likewise.
* include/parallel/compatibility.h: Likewise.
* include/parallel/compiletime_settings.: Likewise.
* include/parallel/equally_split.h: Likewise.
* include/parallel/features.h: Likewise.
* include/parallel/find.h: Likewise.
* include/parallel/find_selectors.h: Likewise.
* include/parallel/for_each.h: Likewise.
* include/parallel/for_each_selectors.h: Likewise.
* include/parallel/iterator.h: Likewise.
* include/parallel/list_partition.h: Likewise.
* include/parallel/losertree.h: Likewise.
* include/parallel/merge.h: Likewise.
* include/parallel/multiseq_selection.h: Likewise.
* include/parallel/multiway_merge.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/numeric: Likewise.
* include/parallel/numericfwd.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/parallel.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/partition.h: Likewise.
* include/parallel/queue.h: Likewise.
* include/parallel/quicksort.h: Likewise.
* include/parallel/random_number.h: Likewise.
* include/parallel/random_shuffle.h: Likewise.
* include/parallel/search.h: Likewise.
* include/parallel/set_operations.h: Likewise.
* include/parallel/settings.h: Likewise.
* include/parallel/sort.h: Likewise.
* include/parallel/tags.h: Likewise.
* include/parallel/types.h: Likewise.
* include/parallel/unique_copy.h: Likewise.
* include/parallel/workstealing.h: Likewise.
2009-09-14 Paolo Carlini <paolo.carlini@oracle.com>
PR libstdc++/41037

File diff suppressed because it is too large Load Diff

View File

@ -50,230 +50,230 @@ namespace __parallel
// NB: equal and lexicographical_compare require mismatch.
// Sequential fallback
template<typename InputIterator1, typename InputIterator2>
inline pair<InputIterator1, InputIterator2>
mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
template<typename _IIter1, typename _IIter2>
inline pair<_IIter1, _IIter2>
mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
__gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2); }
{ return _GLIBCXX_STD_P::mismatch(__begin1, __end1, __begin2); }
// Sequential fallback
template<typename InputIterator1, typename InputIterator2,
typename Predicate>
inline pair<InputIterator1, InputIterator2>
mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
Predicate pred, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); }
template<typename _IIter1, typename _IIter2,
typename _Predicate>
inline pair<_IIter1, _IIter2>
mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
_Predicate __pred, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::mismatch(__begin1, __end1, __begin2, __pred); }
// Sequential fallback for input iterator case
template<typename InputIterator1, typename InputIterator2,
typename Predicate, typename IteratorTag1, typename IteratorTag2>
inline pair<InputIterator1, InputIterator2>
mismatch_switch(InputIterator1 begin1, InputIterator1 end1,
InputIterator2 begin2, Predicate pred, IteratorTag1,
IteratorTag2)
{ return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); }
template<typename _IIter1, typename _IIter2,
typename _Predicate, typename _IteratorTag1, typename _IteratorTag2>
inline pair<_IIter1, _IIter2>
__mismatch_switch(_IIter1 __begin1, _IIter1 __end1,
_IIter2 __begin2, _Predicate __pred, _IteratorTag1,
_IteratorTag2)
{ return _GLIBCXX_STD_P::mismatch(__begin1, __end1, __begin2, __pred); }
// Parallel mismatch for random access iterators
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Predicate>
pair<RandomAccessIterator1, RandomAccessIterator2>
mismatch_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Predicate pred,
template<typename _RAIter1, typename _RAIter2,
typename _Predicate>
pair<_RAIter1, _RAIter2>
__mismatch_switch(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Predicate __pred,
random_access_iterator_tag, random_access_iterator_tag)
{
if (_GLIBCXX_PARALLEL_CONDITION(true))
{
RandomAccessIterator1 res =
__gnu_parallel::find_template(begin1, end1, begin2, pred,
_RAIter1 __res =
__gnu_parallel::__find_template(__begin1, __end1, __begin2, __pred,
__gnu_parallel::
mismatch_selector()).first;
return make_pair(res , begin2 + (res - begin1));
__mismatch_selector()).first;
return make_pair(__res , __begin2 + (__res - __begin1));
}
else
return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred);
return _GLIBCXX_STD_P::mismatch(__begin1, __end1, __begin2, __pred);
}
// Public interface
template<typename InputIterator1, typename InputIterator2>
inline pair<InputIterator1, InputIterator2>
mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2)
template<typename _IIter1, typename _IIter2>
inline pair<_IIter1, _IIter2>
mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2)
{
typedef std::iterator_traits<InputIterator1> iterator1_traits;
typedef std::iterator_traits<InputIterator2> iterator2_traits;
typedef typename iterator1_traits::value_type value1_type;
typedef typename iterator2_traits::value_type value2_type;
typedef typename iterator1_traits::iterator_category iterator1_category;
typedef typename iterator2_traits::iterator_category iterator2_category;
typedef std::iterator_traits<_IIter1> iterator1_traits;
typedef std::iterator_traits<_IIter2> iterator2_traits;
typedef typename iterator1_traits::value_type _ValueType1;
typedef typename iterator2_traits::value_type _ValueType2;
typedef typename iterator1_traits::iterator_category _IteratorCategory1;
typedef typename iterator2_traits::iterator_category _IteratorCategory2;
typedef __gnu_parallel::equal_to<value1_type, value2_type> equal_to_type;
typedef __gnu_parallel::equal_to<_ValueType1, _ValueType2> equal_to_type;
return mismatch_switch(begin1, end1, begin2, equal_to_type(),
iterator1_category(), iterator2_category());
return __mismatch_switch(__begin1, __end1, __begin2, equal_to_type(),
_IteratorCategory1(), _IteratorCategory2());
}
// Public interface
template<typename InputIterator1, typename InputIterator2,
typename Predicate>
inline pair<InputIterator1, InputIterator2>
mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
Predicate pred)
template<typename _IIter1, typename _IIter2,
typename _Predicate>
inline pair<_IIter1, _IIter2>
mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
_Predicate __pred)
{
typedef std::iterator_traits<InputIterator1> iterator1_traits;
typedef std::iterator_traits<InputIterator2> iterator2_traits;
typedef typename iterator1_traits::iterator_category iterator1_category;
typedef typename iterator2_traits::iterator_category iterator2_category;
typedef std::iterator_traits<_IIter1> iterator1_traits;
typedef std::iterator_traits<_IIter2> iterator2_traits;
typedef typename iterator1_traits::iterator_category _IteratorCategory1;
typedef typename iterator2_traits::iterator_category _IteratorCategory2;
return mismatch_switch(begin1, end1, begin2, pred, iterator1_category(),
iterator2_category());
return __mismatch_switch(__begin1, __end1, __begin2, __pred, _IteratorCategory1(),
_IteratorCategory2());
}
// Sequential fallback
template<typename InputIterator1, typename InputIterator2>
template<typename _IIter1, typename _IIter2>
inline bool
equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
__gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::equal(begin1, end1, begin2); }
{ return _GLIBCXX_STD_P::equal(__begin1, __end1, __begin2); }
// Sequential fallback
template<typename InputIterator1, typename InputIterator2,
typename Predicate>
template<typename _IIter1, typename _IIter2,
typename _Predicate>
inline bool
equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
Predicate pred, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::equal(begin1, end1, begin2, pred); }
equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
_Predicate __pred, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::equal(__begin1, __end1, __begin2, __pred); }
// Public interface
template<typename InputIterator1, typename InputIterator2>
template<typename _IIter1, typename _IIter2>
inline bool
equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2)
{ return mismatch(begin1, end1, begin2).first == end1; }
equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2)
{ return mismatch(__begin1, __end1, __begin2).first == __end1; }
// Public interface
template<typename InputIterator1, typename InputIterator2,
typename Predicate>
template<typename _IIter1, typename _IIter2,
typename _Predicate>
inline bool
equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
Predicate pred)
{ return mismatch(begin1, end1, begin2, pred).first == end1; }
equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
_Predicate __pred)
{ return mismatch(__begin1, __end1, __begin2, __pred).first == __end1; }
// Sequential fallback
template<typename InputIterator1, typename InputIterator2>
template<typename _IIter1, typename _IIter2>
inline bool
lexicographical_compare(InputIterator1 begin1, InputIterator1 end1,
InputIterator2 begin2, InputIterator2 end2,
lexicographical_compare(_IIter1 __begin1, _IIter1 __end1,
_IIter2 __begin2, _IIter2 __end2,
__gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1,
begin2, end2); }
{ return _GLIBCXX_STD_P::lexicographical_compare(__begin1, __end1,
__begin2, __end2); }
// Sequential fallback
template<typename InputIterator1, typename InputIterator2,
typename Predicate>
template<typename _IIter1, typename _IIter2,
typename _Predicate>
inline bool
lexicographical_compare(InputIterator1 begin1, InputIterator1 end1,
InputIterator2 begin2, InputIterator2 end2,
Predicate pred, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1,
begin2, end2, pred); }
lexicographical_compare(_IIter1 __begin1, _IIter1 __end1,
_IIter2 __begin2, _IIter2 __end2,
_Predicate __pred, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::lexicographical_compare(__begin1, __end1,
__begin2, __end2, __pred); }
// Sequential fallback for input iterator case
template<typename InputIterator1, typename InputIterator2,
typename Predicate, typename IteratorTag1, typename IteratorTag2>
template<typename _IIter1, typename _IIter2,
typename _Predicate, typename _IteratorTag1, typename _IteratorTag2>
inline bool
lexicographical_compare_switch(InputIterator1 begin1, InputIterator1 end1,
InputIterator2 begin2, InputIterator2 end2,
Predicate pred, IteratorTag1, IteratorTag2)
{ return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1,
begin2, end2, pred); }
__lexicographical_compare_switch(_IIter1 __begin1, _IIter1 __end1,
_IIter2 __begin2, _IIter2 __end2,
_Predicate __pred, _IteratorTag1, _IteratorTag2)
{ return _GLIBCXX_STD_P::lexicographical_compare(__begin1, __end1,
__begin2, __end2, __pred); }
// Parallel lexicographical_compare for random access iterators
// Limitation: Both valuetypes must be the same
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Predicate>
template<typename _RAIter1, typename _RAIter2,
typename _Predicate>
bool
lexicographical_compare_switch(RandomAccessIterator1 begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2 begin2,
RandomAccessIterator2 end2, Predicate pred,
__lexicographical_compare_switch(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2,
_RAIter2 __end2, _Predicate __pred,
random_access_iterator_tag,
random_access_iterator_tag)
{
if (_GLIBCXX_PARALLEL_CONDITION(true))
{
typedef iterator_traits<RandomAccessIterator1> traits1_type;
typedef typename traits1_type::value_type value1_type;
typedef iterator_traits<_RAIter1> _TraitsType1;
typedef typename _TraitsType1::value_type _ValueType1;
typedef iterator_traits<RandomAccessIterator2> traits2_type;
typedef typename traits2_type::value_type value2_type;
typedef iterator_traits<_RAIter2> _TraitsType2;
typedef typename _TraitsType2::value_type _ValueType2;
typedef __gnu_parallel::equal_from_less<Predicate, value1_type,
value2_type> equal_type;
typedef __gnu_parallel::_EqualFromLess<_Predicate, _ValueType1,
_ValueType2> _EqualFromLessCompare;
// Longer sequence in first place.
if ((end1 - begin1) < (end2 - begin2))
if ((__end1 - __begin1) < (__end2 - __begin2))
{
typedef pair<RandomAccessIterator1, RandomAccessIterator2>
pair_type;
pair_type mm = mismatch_switch(begin1, end1, begin2,
equal_type(pred),
typedef pair<_RAIter1, _RAIter2>
_SpotType;
_SpotType __mm = __mismatch_switch(__begin1, __end1, __begin2,
_EqualFromLessCompare(__pred),
random_access_iterator_tag(),
random_access_iterator_tag());
return (mm.first == end1) || bool(pred(*mm.first, *mm.second));
return (__mm.first == __end1) || bool(__pred(*__mm.first, *__mm.second));
}
else
{
typedef pair<RandomAccessIterator2, RandomAccessIterator1>
pair_type;
pair_type mm = mismatch_switch(begin2, end2, begin1,
equal_type(pred),
typedef pair<_RAIter2, _RAIter1>
_SpotType;
_SpotType __mm = __mismatch_switch(__begin2, __end2, __begin1,
_EqualFromLessCompare(__pred),
random_access_iterator_tag(),
random_access_iterator_tag());
return (mm.first != end2) && bool(pred(*mm.second, *mm.first));
return (__mm.first != __end2) && bool(__pred(*__mm.second, *__mm.first));
}
}
else
return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1,
begin2, end2, pred);
return _GLIBCXX_STD_P::lexicographical_compare(__begin1, __end1,
__begin2, __end2, __pred);
}
// Public interface
template<typename InputIterator1, typename InputIterator2>
template<typename _IIter1, typename _IIter2>
inline bool
lexicographical_compare(InputIterator1 begin1, InputIterator1 end1,
InputIterator2 begin2, InputIterator2 end2)
lexicographical_compare(_IIter1 __begin1, _IIter1 __end1,
_IIter2 __begin2, _IIter2 __end2)
{
typedef iterator_traits<InputIterator1> traits1_type;
typedef typename traits1_type::value_type value1_type;
typedef typename traits1_type::iterator_category iterator1_category;
typedef iterator_traits<_IIter1> _TraitsType1;
typedef typename _TraitsType1::value_type _ValueType1;
typedef typename _TraitsType1::iterator_category _IteratorCategory1;
typedef iterator_traits<InputIterator2> traits2_type;
typedef typename traits2_type::value_type value2_type;
typedef typename traits2_type::iterator_category iterator2_category;
typedef __gnu_parallel::less<value1_type, value2_type> less_type;
typedef iterator_traits<_IIter2> _TraitsType2;
typedef typename _TraitsType2::value_type _ValueType2;
typedef typename _TraitsType2::iterator_category _IteratorCategory2;
typedef __gnu_parallel::_Less<_ValueType1, _ValueType2> _LessType;
return lexicographical_compare_switch(begin1, end1, begin2, end2,
less_type(), iterator1_category(),
iterator2_category());
return __lexicographical_compare_switch(__begin1, __end1, __begin2, __end2,
_LessType(), _IteratorCategory1(),
_IteratorCategory2());
}
// Public interface
template<typename InputIterator1, typename InputIterator2,
typename Predicate>
template<typename _IIter1, typename _IIter2,
typename _Predicate>
inline bool
lexicographical_compare(InputIterator1 begin1, InputIterator1 end1,
InputIterator2 begin2, InputIterator2 end2,
Predicate pred)
lexicographical_compare(_IIter1 __begin1, _IIter1 __end1,
_IIter2 __begin2, _IIter2 __end2,
_Predicate __pred)
{
typedef iterator_traits<InputIterator1> traits1_type;
typedef typename traits1_type::iterator_category iterator1_category;
typedef iterator_traits<_IIter1> _TraitsType1;
typedef typename _TraitsType1::iterator_category _IteratorCategory1;
typedef iterator_traits<InputIterator2> traits2_type;
typedef typename traits2_type::iterator_category iterator2_category;
typedef iterator_traits<_IIter2> _TraitsType2;
typedef typename _TraitsType2::iterator_category _IteratorCategory2;
return lexicographical_compare_switch(begin1, end1, begin2, end2, pred,
iterator1_category(),
iterator2_category());
return __lexicographical_compare_switch(__begin1, __end1, __begin2, __end2, __pred,
_IteratorCategory1(),
_IteratorCategory2());
}
} // end namespace
} // end namespace

View File

@ -48,11 +48,11 @@ namespace __parallel
template<typename _FIter, typename _IterTag>
_FIter
adjacent_find_switch(_FIter, _FIter, _IterTag);
__adjacent_find_switch(_FIter, _FIter, _IterTag);
template<typename _RAIter>
_RAIter
adjacent_find_switch(_RAIter, _RAIter, random_access_iterator_tag);
__adjacent_find_switch(_RAIter, _RAIter, random_access_iterator_tag);
template<typename _FIter, typename _BiPredicate>
@ -66,11 +66,11 @@ namespace __parallel
template<typename _FIter, typename _BiPredicate, typename _IterTag>
_FIter
adjacent_find_switch(_FIter, _FIter, _BiPredicate, _IterTag);
__adjacent_find_switch(_FIter, _FIter, _BiPredicate, _IterTag);
template<typename _RAIter, typename _BiPredicate>
_RAIter
adjacent_find_switch(_RAIter, _RAIter, _BiPredicate,
__adjacent_find_switch(_RAIter, _RAIter, _BiPredicate,
random_access_iterator_tag);
@ -88,12 +88,12 @@ namespace __parallel
template<typename _IIter, typename _Tp, typename _IterTag>
typename iterator_traits<_IIter>::difference_type
count_switch(_IIter, _IIter, const _Tp&, _IterTag);
__count_switch(_IIter, _IIter, const _Tp&, _IterTag);
template<typename _RAIter, typename _Tp>
typename iterator_traits<_RAIter>::difference_type
count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
@ -111,12 +111,12 @@ namespace __parallel
template<typename _IIter, typename _Predicate, typename _IterTag>
typename iterator_traits<_IIter>::difference_type
count_if_switch(_IIter, _IIter, _Predicate, _IterTag);
__count_if_switch(_IIter, _IIter, _Predicate, _IterTag);
template<typename _RAIter, typename _Predicate>
typename iterator_traits<_RAIter>::difference_type
count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
// algobase.h
@ -124,18 +124,18 @@ namespace __parallel
bool
equal(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag);
template<typename _IIter1, typename _IIter2, typename Predicate>
template<typename _IIter1, typename _IIter2, typename _Predicate>
bool
equal(_IIter1, _IIter1, _IIter2, Predicate,
equal(_IIter1, _IIter1, _IIter2, _Predicate,
__gnu_parallel::sequential_tag);
template<typename _IIter1, typename _IIter2>
bool
equal(_IIter1, _IIter1, _IIter2);
template<typename _IIter1, typename _IIter2, typename Predicate>
template<typename _IIter1, typename _IIter2, typename _Predicate>
bool
equal(_IIter1, _IIter1, _IIter2, Predicate);
equal(_IIter1, _IIter1, _IIter2, _Predicate);
template<typename _IIter, typename _Tp>
_IIter
@ -143,15 +143,15 @@ namespace __parallel
template<typename _IIter, typename _Tp>
_IIter
find(_IIter, _IIter, const _Tp& val);
find(_IIter, _IIter, const _Tp& __val);
template<typename _IIter, typename _Tp, typename _IterTag>
_IIter
find_switch(_IIter, _IIter, const _Tp&, _IterTag);
__find_switch(_IIter, _IIter, const _Tp&, _IterTag);
template<typename _RAIter, typename _Tp>
_RAIter
find_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag);
__find_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag);
template<typename _IIter, typename _Predicate>
_IIter
@ -163,11 +163,11 @@ namespace __parallel
template<typename _IIter, typename _Predicate, typename _IterTag>
_IIter
find_if_switch(_IIter, _IIter, _Predicate, _IterTag);
__find_if_switch(_IIter, _IIter, _Predicate, _IterTag);
template<typename _RAIter, typename _Predicate>
_RAIter
find_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag);
__find_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag);
template<typename _IIter, typename _FIter>
_IIter
@ -190,18 +190,18 @@ namespace __parallel
template<typename _IIter, typename _FIter,
typename _IterTag1, typename _IterTag2>
_IIter
find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _IterTag1, _IterTag2);
__find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _IterTag1, _IterTag2);
template<typename _RAIter, typename _FIter, typename _BiPredicate,
typename _IterTag>
_RAIter
find_first_of_switch(_RAIter, _RAIter, _FIter, _FIter, _BiPredicate,
__find_first_of_switch(_RAIter, _RAIter, _FIter, _FIter, _BiPredicate,
random_access_iterator_tag, _IterTag);
template<typename _IIter, typename _FIter, typename _BiPredicate,
typename _IterTag1, typename _IterTag2>
_IIter
find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _BiPredicate,
__find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _BiPredicate,
_IterTag1, _IterTag2);
@ -219,12 +219,12 @@ namespace __parallel
template<typename _IIter, typename _Function, typename _IterTag>
_Function
for_each_switch(_IIter, _IIter, _Function, _IterTag);
__for_each_switch(_IIter, _IIter, _Function, _IterTag);
template<typename _RAIter, typename _Function>
_Function
for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_balanced);
@ -242,12 +242,12 @@ namespace __parallel
template<typename _FIter, typename _Generator, typename _IterTag>
void
generate_switch(_FIter, _FIter, _Generator, _IterTag);
__generate_switch(_FIter, _FIter, _Generator, _IterTag);
template<typename _RAIter, typename _Generator>
void
generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_balanced);
template<typename _OIter, typename _Size, typename _Generator>
@ -265,12 +265,12 @@ namespace __parallel
template<typename _OIter, typename _Size, typename _Generator,
typename _IterTag>
_OIter
generate_n_switch(_OIter, _Size, _Generator, _IterTag);
__generate_n_switch(_OIter, _Size, _Generator, _IterTag);
template<typename _RAIter, typename _Size, typename _Generator>
_RAIter
generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_balanced);
template<typename _IIter1, typename _IIter2>
@ -294,12 +294,12 @@ namespace __parallel
template<typename _IIter1, typename _IIter2,
typename _Predicate, typename _IterTag1, typename _IterTag2>
bool
lexicographical_compare_switch(_IIter1, _IIter1, _IIter2, _IIter2,
__lexicographical_compare_switch(_IIter1, _IIter1, _IIter2, _IIter2,
_Predicate, _IterTag1, _IterTag2);
template<typename _RAIter1, typename _RAIter2, typename _Predicate>
bool
lexicographical_compare_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
__lexicographical_compare_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
_Predicate, random_access_iterator_tag,
random_access_iterator_tag);
@ -324,12 +324,12 @@ namespace __parallel
template<typename _IIter1, typename _IIter2, typename _Predicate,
typename _IterTag1, typename _IterTag2>
pair<_IIter1, _IIter2>
mismatch_switch(_IIter1, _IIter1, _IIter2, _Predicate,
__mismatch_switch(_IIter1, _IIter1, _IIter2, _Predicate,
_IterTag1, _IterTag2);
template<typename _RAIter1, typename _RAIter2, typename _Predicate>
pair<_RAIter1, _RAIter2>
mismatch_switch(_RAIter1, _RAIter1, _RAIter2, _Predicate,
__mismatch_switch(_RAIter1, _RAIter1, _RAIter2, _Predicate,
random_access_iterator_tag, random_access_iterator_tag);
template<typename _FIter1, typename _FIter2>
@ -351,23 +351,23 @@ namespace __parallel
template<typename _RAIter1, typename _RAIter2>
_RAIter1
search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
__search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
random_access_iterator_tag, random_access_iterator_tag);
template<typename _FIter1, typename _FIter2, typename _IterTag1,
typename _IterTag2>
_FIter1
search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _IterTag1, _IterTag2);
__search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _IterTag1, _IterTag2);
template<typename _RAIter1, typename _RAIter2, typename _BiPredicate>
_RAIter1
search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _BiPredicate,
__search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _BiPredicate,
random_access_iterator_tag, random_access_iterator_tag);
template<typename _FIter1, typename _FIter2, typename _BiPredicate,
typename _IterTag1, typename _IterTag2>
_FIter1
search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate,
__search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate,
_IterTag1, _IterTag2);
template<typename _FIter, typename _Integer, typename _Tp>
@ -393,42 +393,42 @@ namespace __parallel
template<typename _RAIter, typename _Integer, typename _Tp,
typename _BiPredicate>
_RAIter
search_n_switch(_RAIter, _RAIter, _Integer, const _Tp&,
__search_n_switch(_RAIter, _RAIter, _Integer, const _Tp&,
_BiPredicate, random_access_iterator_tag);
template<typename _FIter, typename _Integer, typename _Tp,
typename _BiPredicate, typename _IterTag>
_FIter
search_n_switch(_FIter, _FIter, _Integer, const _Tp&,
__search_n_switch(_FIter, _FIter, _Integer, const _Tp&,
_BiPredicate, _IterTag);
template<typename _IIter, typename _OIter, typename UnaryOperation>
template<typename _IIter, typename _OIter, typename _UnaryOperation>
_OIter
transform(_IIter, _IIter, _OIter, UnaryOperation);
transform(_IIter, _IIter, _OIter, _UnaryOperation);
template<typename _IIter, typename _OIter, typename UnaryOperation>
template<typename _IIter, typename _OIter, typename _UnaryOperation>
_OIter
transform(_IIter, _IIter, _OIter, UnaryOperation,
transform(_IIter, _IIter, _OIter, _UnaryOperation,
__gnu_parallel::sequential_tag);
template<typename _IIter, typename _OIter, typename UnaryOperation>
template<typename _IIter, typename _OIter, typename _UnaryOperation>
_OIter
transform(_IIter, _IIter, _OIter, UnaryOperation,
transform(_IIter, _IIter, _OIter, _UnaryOperation,
__gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter, typename UnaryOperation,
template<typename _IIter, typename _OIter, typename _UnaryOperation,
typename _IterTag1, typename _IterTag2>
_OIter
transform1_switch(_IIter, _IIter, _OIter, UnaryOperation,
__transform1_switch(_IIter, _IIter, _OIter, _UnaryOperation,
_IterTag1, _IterTag2);
template<typename _RAIIter, typename _RAOIter, typename UnaryOperation>
template<typename _RAIIter, typename _RAOIter, typename _UnaryOperation>
_RAOIter
transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation,
__transform1_switch(_RAIIter, _RAIIter, _RAOIter, _UnaryOperation,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_balanced);
@ -452,17 +452,17 @@ namespace __parallel
template<typename _RAIter1, typename _RAIter2, typename _RAIter3,
typename _BiOperation>
_RAIter3
transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation,
__transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation,
random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_balanced);
template<typename _IIter1, typename _IIter2, typename _OIter,
typename _BiOperation, typename _Tag1,
typename _Tag2, typename _Tag3>
_OIter
transform2_switch(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation,
__transform2_switch(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation,
_Tag1, _Tag2, _Tag3);
@ -482,11 +482,11 @@ namespace __parallel
template<typename _FIter, typename _Tp, typename _IterTag>
void
replace_switch(_FIter, _FIter, const _Tp&, const _Tp&, _IterTag);
__replace_switch(_FIter, _FIter, const _Tp&, const _Tp&, _IterTag);
template<typename _RAIter, typename _Tp>
void
replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&,
__replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&,
random_access_iterator_tag, __gnu_parallel::_Parallelism);
@ -507,11 +507,11 @@ namespace __parallel
template<typename _FIter, typename _Predicate, typename _Tp,
typename _IterTag>
void
replace_if_switch(_FIter, _FIter, _Predicate, const _Tp&, _IterTag);
__replace_if_switch(_FIter, _FIter, _Predicate, const _Tp&, _IterTag);
template<typename _RAIter, typename _Predicate, typename _Tp>
void
replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&,
__replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&,
random_access_iterator_tag,
__gnu_parallel::_Parallelism);
@ -542,12 +542,12 @@ namespace __parallel
template<typename _FIter, typename _Compare, typename _IterTag>
_FIter
max_element_switch(_FIter, _FIter, _Compare, _IterTag);
__max_element_switch(_FIter, _FIter, _Compare, _IterTag);
template<typename _RAIter, typename _Compare>
_RAIter
max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_balanced);
@ -575,13 +575,13 @@ namespace __parallel
typename _Compare, typename _IterTag1, typename _IterTag2,
typename _IterTag3>
_OIter
merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare,
__merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare,
_IterTag1, _IterTag2, _IterTag3);
template<typename _IIter1, typename _IIter2, typename _OIter,
typename _Compare>
_OIter
merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare,
__merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare,
random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag);
@ -596,7 +596,7 @@ namespace __parallel
template<typename _FIter>
_FIter
min_element(_FIter, _FIter, __gnu_parallel::_Parallelism parallelism_tag);
min_element(_FIter, _FIter, __gnu_parallel::_Parallelism __parallelism_tag);
template<typename _FIter, typename _Compare>
_FIter
@ -612,12 +612,12 @@ namespace __parallel
template<typename _FIter, typename _Compare, typename _IterTag>
_FIter
min_element_switch(_FIter, _FIter, _Compare, _IterTag);
__min_element_switch(_FIter, _FIter, _Compare, _IterTag);
template<typename _RAIter, typename _Compare>
_RAIter
min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_balanced);
template<typename _RAIter>
@ -654,21 +654,21 @@ namespace __parallel
void
partial_sort(_RAIter, _RAIter, _RAIter);
template<typename _FIter, typename Predicate>
template<typename _FIter, typename _Predicate>
_FIter
partition(_FIter, _FIter, Predicate, __gnu_parallel::sequential_tag);
partition(_FIter, _FIter, _Predicate, __gnu_parallel::sequential_tag);
template<typename _FIter, typename Predicate>
template<typename _FIter, typename _Predicate>
_FIter
partition(_FIter, _FIter, Predicate);
partition(_FIter, _FIter, _Predicate);
template<typename _FIter, typename Predicate, typename _IterTag>
template<typename _FIter, typename _Predicate, typename _IterTag>
_FIter
partition_switch(_FIter, _FIter, Predicate, _IterTag);
__partition_switch(_FIter, _FIter, _Predicate, _IterTag);
template<typename _RAIter, typename Predicate>
template<typename _RAIter, typename _Predicate>
_RAIter
partition_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag);
__partition_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag);
template<typename _RAIter>
void
@ -693,9 +693,9 @@ namespace __parallel
__gnu_parallel::sequential_tag);
template<typename _IIter1, typename _IIter2, typename _OIter,
typename Predicate>
typename _Predicate>
_OIter
set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate,
set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate,
__gnu_parallel::sequential_tag);
template<typename _IIter1, typename _IIter2, typename _OIter>
@ -711,13 +711,13 @@ namespace __parallel
typename _OIter, typename _IterTag1, typename _IterTag2,
typename _IterTag3>
_OIter
set_union_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
__set_union_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
_Predicate, _IterTag1, _IterTag2, _IterTag3);
template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter,
typename _Predicate>
_Output_RAIter
set_union_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _Output_RAIter,
__set_union_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _Output_RAIter,
_Predicate, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag);
@ -745,13 +745,13 @@ namespace __parallel
typename _OIter, typename _IterTag1, typename _IterTag2,
typename _IterTag3>
_OIter
set_intersection_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
__set_intersection_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
_Predicate, _IterTag1, _IterTag2, _IterTag3);
template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter,
typename _Predicate>
_Output_RAIter
set_intersection_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
__set_intersection_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
_Output_RAIter, _Predicate,
random_access_iterator_tag,
random_access_iterator_tag,
@ -782,14 +782,14 @@ namespace __parallel
typename _OIter, typename _IterTag1, typename _IterTag2,
typename _IterTag3>
_OIter
set_symmetric_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2,
__set_symmetric_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2,
_OIter, _Predicate, _IterTag1, _IterTag2,
_IterTag3);
template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter,
typename _Predicate>
_Output_RAIter
set_symmetric_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
__set_symmetric_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
_Output_RAIter, _Predicate,
random_access_iterator_tag,
random_access_iterator_tag,
@ -820,13 +820,13 @@ namespace __parallel
typename _OIter, typename _IterTag1, typename _IterTag2,
typename _IterTag3>
_OIter
set_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
__set_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
_Predicate, _IterTag1, _IterTag2, _IterTag3);
template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter,
typename _Predicate>
_Output_RAIter
set_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
__set_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
_Output_RAIter, _Predicate,
random_access_iterator_tag,
random_access_iterator_tag,
@ -885,12 +885,12 @@ namespace __parallel
template<typename _IIter, typename _OIter, typename _Predicate,
typename _IterTag1, typename _IterTag2>
_OIter
unique_copy_switch(_IIter, _IIter, _OIter, _Predicate,
__unique_copy_switch(_IIter, _IIter, _OIter, _Predicate,
_IterTag1, _IterTag2);
template<typename _RAIter, typename _RandomAccess_OIter, typename _Predicate>
_RandomAccess_OIter
unique_copy_switch(_RAIter, _RAIter, _RandomAccess_OIter, _Predicate,
__unique_copy_switch(_RAIter, _RAIter, _RandomAccess_OIter, _Predicate,
random_access_iterator_tag, random_access_iterator_tag);
} // end namespace __parallel
} // end namespace std

View File

@ -58,171 +58,171 @@
namespace __gnu_parallel
{
/** @brief Information local to one thread in the parallel quicksort run. */
template<typename RandomAccessIterator>
struct QSBThreadLocal
template<typename _RAIter>
struct _QSBThreadLocal
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
/** @brief Continuous part of the sequence, described by an
iterator pair. */
typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
typedef std::pair<_RAIter, _RAIter> _Piece;
/** @brief Initial piece to work on. */
Piece initial;
_Piece _M_initial;
/** @brief Work-stealing queue. */
RestrictedBoundedConcurrentQueue<Piece> leftover_parts;
_RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts;
/** @brief Number of threads involved in this algorithm. */
thread_index_t num_threads;
_ThreadIndex __num_threads;
/** @brief Pointer to a counter of elements left over to sort. */
volatile difference_type* elements_leftover;
volatile _DifferenceType* _M_elements_leftover;
/** @brief The complete sequence to sort. */
Piece global;
_Piece _M_global;
/** @brief Constructor.
* @param queue_size Size of the work-stealing queue. */
QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { }
* @param __queue_size size of the work-stealing queue. */
_QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
};
/** @brief Balanced quicksort divide step.
* @param begin Begin iterator of subsequence.
* @param end End iterator of subsequence.
* @param comp Comparator.
* @param num_threads Number of threads that are allowed to work on
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
* @pre @c (end-begin)>=1 */
template<typename RandomAccessIterator, typename Comparator>
typename std::iterator_traits<RandomAccessIterator>::difference_type
qsb_divide(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, thread_index_t num_threads)
* @pre @__c (__end-__begin)>=1 */
template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type
__qsb_divide(_RAIter __begin, _RAIter __end,
_Compare __comp, _ThreadIndex __num_threads)
{
_GLIBCXX_PARALLEL_ASSERT(num_threads > 0);
_GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
RandomAccessIterator pivot_pos =
median_of_three_iterators(begin, begin + (end - begin) / 2,
end - 1, comp);
_RAIter __pivot_pos =
__median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
__end - 1, __comp);
#if defined(_GLIBCXX_ASSERTIONS)
// Must be in between somewhere.
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
_GLIBCXX_PARALLEL_ASSERT(
(!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
|| (!comp(*pivot_pos, *begin) && !comp(*(end - 1), *pivot_pos))
|| (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
|| (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*(end - 1), *pivot_pos))
|| (!comp(*pivot_pos, *(end - 1)) && !comp(*begin, *pivot_pos))
|| (!comp(*pivot_pos, *(end - 1)) && !comp(*(begin + n / 2), *pivot_pos)));
(!__comp(*__pivot_pos, *__begin) && !__comp(*(__begin + __n / 2), *__pivot_pos))
|| (!__comp(*__pivot_pos, *__begin) && !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) && !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) && !__comp(*(__begin + __n / 2), *__pivot_pos)));
#endif
// Swap pivot value to end.
if (pivot_pos != (end - 1))
std::swap(*pivot_pos, *(end - 1));
pivot_pos = end - 1;
if (__pivot_pos != (__end - 1))
std::swap(*__pivot_pos, *(__end - 1));
__pivot_pos = __end - 1;
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
pred(comp, *pivot_pos);
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos);
// Divide, returning end - begin - 1 in the worst case.
difference_type split_pos = parallel_partition(
begin, end - 1, pred, num_threads);
// Divide, returning __end - __begin - 1 in the worst case.
_DifferenceType __split_pos = __parallel_partition(
__begin, __end - 1, __pred, __num_threads);
// Swap back pivot to middle.
std::swap(*(begin + split_pos), *pivot_pos);
pivot_pos = begin + split_pos;
std::swap(*(__begin + __split_pos), *__pivot_pos);
__pivot_pos = __begin + __split_pos;
#if _GLIBCXX_ASSERTIONS
RandomAccessIterator r;
for (r = begin; r != pivot_pos; ++r)
_GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos));
for (; r != end; ++r)
_GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos));
_RAIter __r;
for (__r = __begin; __r != __pivot_pos; ++__r)
_GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
for (; __r != __end; ++__r)
_GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
#endif
return split_pos;
return __split_pos;
}
/** @brief Quicksort conquer step.
* @param tls Array of thread-local storages.
* @param begin Begin iterator of subsequence.
* @param end End iterator of subsequence.
* @param comp Comparator.
* @param iam Number of the thread processing this function.
* @param num_threads
* @param __tls Array of thread-local storages.
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __iam Number of the thread processing this function.
* @param __num_threads
* Number of threads that are allowed to work on this part. */
template<typename RandomAccessIterator, typename Comparator>
template<typename _RAIter, typename _Compare>
void
qsb_conquer(QSBThreadLocal<RandomAccessIterator>** tls,
RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp,
thread_index_t iam, thread_index_t num_threads,
bool parent_wait)
__qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __iam, _ThreadIndex __num_threads,
bool __parent_wait)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
if (num_threads <= 1 || n <= 1)
if (__num_threads <= 1 || __n <= 1)
{
tls[iam]->initial.first = begin;
tls[iam]->initial.second = end;
__tls[__iam]->_M_initial.first = __begin;
__tls[__iam]->_M_initial.second = __end;
qsb_local_sort_with_helping(tls, comp, iam, parent_wait);
__qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
return;
}
// Divide step.
difference_type split_pos = qsb_divide(begin, end, comp, num_threads);
_DifferenceType __split_pos = __qsb_divide(__begin, __end, __comp, __num_threads);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin));
_GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && __split_pos < (__end - __begin));
#endif
thread_index_t num_threads_leftside =
std::max<thread_index_t>(1, std::min<thread_index_t>(
num_threads - 1, split_pos * num_threads / n));
_ThreadIndex __num_threads_leftside =
std::max<_ThreadIndex>(1, std::min<_ThreadIndex>(
__num_threads - 1, __split_pos * __num_threads / __n));
# pragma omp atomic
*tls[iam]->elements_leftover -= (difference_type)1;
*__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
// Conquer step.
# pragma omp parallel num_threads(2)
{
bool wait;
bool __wait;
if(omp_get_num_threads() < 2)
wait = false;
__wait = false;
else
wait = parent_wait;
__wait = __parent_wait;
# pragma omp sections
{
# pragma omp section
{
qsb_conquer(tls, begin, begin + split_pos, comp,
iam,
num_threads_leftside,
wait);
wait = parent_wait;
__qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
__iam,
__num_threads_leftside,
__wait);
__wait = __parent_wait;
}
// The pivot_pos is left in place, to ensure termination.
# pragma omp section
{
qsb_conquer(tls, begin + split_pos + 1, end, comp,
iam + num_threads_leftside,
num_threads - num_threads_leftside,
wait);
wait = parent_wait;
__qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
__iam + __num_threads_leftside,
__num_threads - __num_threads_leftside,
__wait);
__wait = __parent_wait;
}
}
}
@ -230,175 +230,175 @@ template<typename RandomAccessIterator, typename Comparator>
/**
* @brief Quicksort step doing load-balanced local sort.
* @param tls Array of thread-local storages.
* @param comp Comparator.
* @param iam Number of the thread processing this function.
* @param __tls Array of thread-local storages.
* @param __comp Comparator.
* @param __iam Number of the thread processing this function.
*/
template<typename RandomAccessIterator, typename Comparator>
template<typename _RAIter, typename _Compare>
void
qsb_local_sort_with_helping(QSBThreadLocal<RandomAccessIterator>** tls,
Comparator& comp, int iam, bool wait)
__qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls,
_Compare& __comp, int __iam, bool __wait)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef std::pair<_RAIter, _RAIter> _Piece;
QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam];
_QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
difference_type base_case_n =
_DifferenceType __base_case_n =
_Settings::get().sort_qsb_base_case_maximal_n;
if (base_case_n < 2)
base_case_n = 2;
thread_index_t num_threads = tl.num_threads;
if (__base_case_n < 2)
__base_case_n = 2;
_ThreadIndex __num_threads = __tl.__num_threads;
// Every thread has its own random number generator.
random_number rng(iam + 1);
_RandomNumber __rng(__iam + 1);
Piece current = tl.initial;
_Piece __current = __tl._M_initial;
difference_type elements_done = 0;
_DifferenceType __elements_done = 0;
#if _GLIBCXX_ASSERTIONS
difference_type total_elements_done = 0;
_DifferenceType __total_elements_done = 0;
#endif
for (;;)
{
// Invariant: current must be a valid (maybe empty) range.
RandomAccessIterator begin = current.first, end = current.second;
difference_type n = end - begin;
// Invariant: __current must be a valid (maybe empty) range.
_RAIter __begin = __current.first, __end = __current.second;
_DifferenceType __n = __end - __begin;
if (n > base_case_n)
if (__n > __base_case_n)
{
// Divide.
RandomAccessIterator pivot_pos = begin + rng(n);
_RAIter __pivot_pos = __begin + __rng(__n);
// Swap pivot_pos value to end.
if (pivot_pos != (end - 1))
std::swap(*pivot_pos, *(end - 1));
pivot_pos = end - 1;
// Swap __pivot_pos value to end.
if (__pivot_pos != (__end - 1))
std::swap(*__pivot_pos, *(__end - 1));
__pivot_pos = __end - 1;
__gnu_parallel::binder2nd
<Comparator, value_type, value_type, bool>
pred(comp, *pivot_pos);
<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos);
// Divide, leave pivot unchanged in last place.
RandomAccessIterator split_pos1, split_pos2;
split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
_RAIter __split_pos1, __split_pos2;
__split_pos1 = __gnu_sequential::partition(__begin, __end - 1, __pred);
// Left side: < pivot_pos; right side: >= pivot_pos.
// Left side: < __pivot_pos; __right side: >= __pivot_pos.
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
_GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 && __split_pos1 < __end);
#endif
// Swap pivot back to middle.
if (split_pos1 != pivot_pos)
std::swap(*split_pos1, *pivot_pos);
pivot_pos = split_pos1;
if (__split_pos1 != __pivot_pos)
std::swap(*__split_pos1, *__pivot_pos);
__pivot_pos = __split_pos1;
// In case all elements are equal, split_pos1 == 0.
if ((split_pos1 + 1 - begin) < (n >> 7)
|| (end - split_pos1) < (n >> 7))
// In case all elements are equal, __split_pos1 == 0.
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|| (__end - __split_pos1) < (__n >> 7))
{
// Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot.
__gnu_parallel::unary_negate<__gnu_parallel::binder1st
<Comparator, value_type, value_type, bool>, value_type>
pred(__gnu_parallel::binder1st
<Comparator, value_type, value_type, bool>(comp,
*pivot_pos));
__gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>(__comp,
*__pivot_pos));
// Find other end of pivot-equal range.
split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
end, pred);
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred);
}
else
// Only skip the pivot.
split_pos2 = split_pos1 + 1;
__split_pos2 = __split_pos1 + 1;
// Elements equal to pivot are done.
elements_done += (split_pos2 - split_pos1);
__elements_done += (__split_pos2 - __split_pos1);
#if _GLIBCXX_ASSERTIONS
total_elements_done += (split_pos2 - split_pos1);
__total_elements_done += (__split_pos2 - __split_pos1);
#endif
// Always push larger part onto stack.
if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
{
// Right side larger.
if ((split_pos2) != end)
tl.leftover_parts.push_front(std::make_pair(split_pos2,
end));
if ((__split_pos2) != __end)
__tl._M_leftover_parts.push_front(std::make_pair(__split_pos2,
__end));
//current.first = begin; //already set anyway
current.second = split_pos1;
//__current.first = __begin; //already set anyway
__current.second = __split_pos1;
continue;
}
else
{
// Left side larger.
if (begin != split_pos1)
tl.leftover_parts.push_front(std::make_pair(begin,
split_pos1));
if (__begin != __split_pos1)
__tl._M_leftover_parts.push_front(std::make_pair(__begin,
__split_pos1));
current.first = split_pos2;
//current.second = end; //already set anyway
__current.first = __split_pos2;
//__current.second = __end; //already set anyway
continue;
}
}
else
{
__gnu_sequential::sort(begin, end, comp);
elements_done += n;
__gnu_sequential::sort(__begin, __end, __comp);
__elements_done += __n;
#if _GLIBCXX_ASSERTIONS
total_elements_done += n;
__total_elements_done += __n;
#endif
// Prefer own stack, small pieces.
if (tl.leftover_parts.pop_front(current))
if (__tl._M_leftover_parts.pop_front(__current))
continue;
# pragma omp atomic
*tl.elements_leftover -= elements_done;
*__tl._M_elements_leftover -= __elements_done;
elements_done = 0;
__elements_done = 0;
#if _GLIBCXX_ASSERTIONS
double search_start = omp_get_wtime();
double __search_start = omp_get_wtime();
#endif
// Look for new work.
bool successfully_stolen = false;
while (wait && *tl.elements_leftover > 0 && !successfully_stolen
bool __successfully_stolen = false;
while (__wait && *__tl._M_elements_leftover > 0 && !__successfully_stolen
#if _GLIBCXX_ASSERTIONS
// Possible dead-lock.
&& (omp_get_wtime() < (search_start + 1.0))
&& (omp_get_wtime() < (__search_start + 1.0))
#endif
)
{
thread_index_t victim;
victim = rng(num_threads);
_ThreadIndex __victim;
__victim = __rng(__num_threads);
// Large pieces.
successfully_stolen = (victim != iam)
&& tls[victim]->leftover_parts.pop_back(current);
if (!successfully_stolen)
yield();
__successfully_stolen = (__victim != __iam)
&& __tls[__victim]->_M_leftover_parts.pop_back(__current);
if (!__successfully_stolen)
__yield();
#if !defined(__ICC) && !defined(__ECC)
# pragma omp flush
#endif
}
#if _GLIBCXX_ASSERTIONS
if (omp_get_wtime() >= (search_start + 1.0))
if (omp_get_wtime() >= (__search_start + 1.0))
{
sleep(1);
_GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
< (search_start + 1.0));
< (__search_start + 1.0));
}
#endif
if (!successfully_stolen)
if (!__successfully_stolen)
{
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0);
_GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
#endif
return;
}
@ -407,70 +407,70 @@ template<typename RandomAccessIterator, typename Comparator>
}
/** @brief Top-level quicksort routine.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param comp Comparator.
* @param num_threads Number of threads that are allowed to work on
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename RandomAccessIterator, typename Comparator>
template<typename _RAIter, typename _Compare>
void
parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp,
thread_index_t num_threads)
__parallel_sort_qsb(_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef std::pair<_RAIter, _RAIter> _Piece;
typedef QSBThreadLocal<RandomAccessIterator> tls_type;
typedef _QSBThreadLocal<_RAIter> _TLSType;
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
if (n <= 1)
if (__n <= 1)
return;
// At least one element per processor.
if (num_threads > n)
num_threads = static_cast<thread_index_t>(n);
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
// Initialize thread local storage
tls_type** tls = new tls_type*[num_threads];
difference_type queue_size = num_threads * (thread_index_t)(log2(n) + 1);
for (thread_index_t t = 0; t < num_threads; ++t)
tls[t] = new QSBThreadLocal<RandomAccessIterator>(queue_size);
_TLSType** __tls = new _TLSType*[__num_threads];
_DifferenceType __queue_size = __num_threads * (_ThreadIndex)(log2(__n) + 1);
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
// There can never be more than ceil(log2(n)) ranges on the stack, because
// There can never be more than ceil(log2(__n)) ranges on the stack, because
// 1. Only one processor pushes onto the stack
// 2. The largest range has at most length n
// 2. The largest range has at most length __n
// 3. Each range is larger than half of the range remaining
volatile difference_type elements_leftover = n;
for (int i = 0; i < num_threads; ++i)
volatile _DifferenceType _M_elements_leftover = __n;
for (int __i = 0; __i < __num_threads; ++__i)
{
tls[i]->elements_leftover = &elements_leftover;
tls[i]->num_threads = num_threads;
tls[i]->global = std::make_pair(begin, end);
__tls[__i]->_M_elements_leftover = &_M_elements_leftover;
__tls[__i]->__num_threads = __num_threads;
__tls[__i]->_M_global = std::make_pair(__begin, __end);
// Just in case nothing is left to assign.
tls[i]->initial = std::make_pair(end, end);
__tls[__i]->_M_initial = std::make_pair(__end, __end);
}
// Main recursion call.
qsb_conquer(tls, begin, begin + n, comp, 0, num_threads, true);
__qsb_conquer(__tls, __begin, __begin + __n, __comp, 0, __num_threads, true);
#if _GLIBCXX_ASSERTIONS
// All stack must be empty.
Piece dummy;
for (int i = 1; i < num_threads; ++i)
_GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy));
_Piece __dummy;
for (int __i = 1; __i < __num_threads; ++__i)
_GLIBCXX_PARALLEL_ASSERT(!__tls[__i]->_M_leftover_parts.pop_back(__dummy));
#endif
for (int i = 0; i < num_threads; ++i)
delete tls[i];
delete[] tls;
for (int __i = 0; __i < __num_threads; ++__i)
delete __tls[__i];
delete[] __tls;
}
} // namespace __gnu_parallel

View File

@ -82,7 +82,7 @@ namespace __gnu_parallel
// and active, which imples that the OpenMP runtime is actually
// going to be linked in.
inline int
get_max_threads()
__get_max_threads()
{
int __i = omp_get_max_threads();
return __i > 1 ? __i : 1;
@ -90,91 +90,91 @@ namespace __gnu_parallel
inline bool
is_parallel(const _Parallelism __p) { return __p != sequential; }
__is_parallel(const _Parallelism __p) { return __p != sequential; }
// XXX remove std::duplicates from here if possible,
// XXX but keep minimal dependencies.
/** @brief Calculates the rounded-down logarithm of @c n for base 2.
* @param n Argument.
/** @brief Calculates the rounded-down logarithm of @__c __n for base 2.
* @param __n Argument.
* @return Returns 0 for any argument <1.
*/
template<typename Size>
inline Size
__log2(Size n)
template<typename _Size>
inline _Size
__log2(_Size __n)
{
Size k;
for (k = 0; n > 1; n >>= 1)
++k;
return k;
_Size __k;
for (__k = 0; __n > 1; __n >>= 1)
++__k;
return __k;
}
/** @brief Encode two integers into one __gnu_parallel::lcas_t.
* @param a First integer, to be encoded in the most-significant @c
* lcas_t_bits/2 bits.
* @param b Second integer, to be encoded in the least-significant
* @c lcas_t_bits/2 bits.
* @return __gnu_parallel::lcas_t value encoding @c a and @c b.
/** @brief Encode two integers into one __gnu_parallel::_CASable.
* @param __a First integer, to be encoded in the most-significant @__c
* _CASable_bits/2 bits.
* @param __b Second integer, to be encoded in the least-significant
* @__c _CASable_bits/2 bits.
* @return __gnu_parallel::_CASable _M_value encoding @__c __a and @__c __b.
* @see decode2
*/
inline lcas_t
encode2(int a, int b) //must all be non-negative, actually
inline _CASable
__encode2(int __a, int __b) //must all be non-negative, actually
{
return (((lcas_t)a) << (lcas_t_bits / 2)) | (((lcas_t)b) << 0);
return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0);
}
/** @brief Decode two integers from one __gnu_parallel::lcas_t.
* @param x __gnu_parallel::lcas_t to decode integers from.
* @param a First integer, to be decoded from the most-significant
* @c lcas_t_bits/2 bits of @c x.
* @param b Second integer, to be encoded in the least-significant
* @c lcas_t_bits/2 bits of @c x.
* @see encode2
/** @brief Decode two integers from one __gnu_parallel::_CASable.
* @param __x __gnu_parallel::_CASable to decode integers from.
* @param __a First integer, to be decoded from the most-significant
* @__c _CASable_bits/2 bits of @__c __x.
* @param __b Second integer, to be encoded in the least-significant
* @__c _CASable_bits/2 bits of @__c __x.
* @see __encode2
*/
inline void
decode2(lcas_t x, int& a, int& b)
decode2(_CASable __x, int& __a, int& __b)
{
a = (int)((x >> (lcas_t_bits / 2)) & lcas_t_mask);
b = (int)((x >> 0 ) & lcas_t_mask);
__a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask);
__b = (int)((__x >> 0 ) & _CASable_mask);
}
/** @brief Equivalent to std::min. */
template<typename T>
const T&
min(const T& a, const T& b)
{ return (a < b) ? a : b; }
template<typename _Tp>
const _Tp&
min(const _Tp& __a, const _Tp& __b)
{ return (__a < __b) ? __a : __b; }
/** @brief Equivalent to std::max. */
template<typename T>
const T&
max(const T& a, const T& b)
{ return (a > b) ? a : b; }
template<typename _Tp>
const _Tp&
max(const _Tp& __a, const _Tp& __b)
{ return (__a > __b) ? __a : __b; }
/** @brief Constructs predicate for equality from strict weak
* ordering predicate
*/
// XXX comparator at the end, as per others
template<typename Comparator, typename T1, typename T2>
class equal_from_less : public std::binary_function<T1, T2, bool>
template<typename _Compare, typename _T1, typename _T2>
class _EqualFromLess : public std::binary_function<_T1, _T2, bool>
{
private:
Comparator& comp;
_Compare& __comp;
public:
equal_from_less(Comparator& _comp) : comp(_comp) { }
_EqualFromLess(_Compare& _comp) : __comp(_comp) { }
bool operator()(const T1& a, const T2& b)
bool operator()(const _T1& __a, const _T2& __b)
{
return !comp(a, b) && !comp(b, a);
return !__comp(__a, __b) && !__comp(__b, __a);
}
};
/** @brief Similar to std::binder1st,
/** @brief Similar to std::__binder1st,
* but giving the argument types explicitly. */
template<typename _Predicate, typename argument_type>
class unary_negate
class __unary_negate
: public std::unary_function<argument_type, bool>
{
protected:
@ -182,93 +182,93 @@ template<typename _Predicate, typename argument_type>
public:
explicit
unary_negate(const _Predicate& __x) : _M_pred(__x) { }
__unary_negate(const _Predicate& __x) : _M_pred(__x) { }
bool
operator()(const argument_type& __x)
{ return !_M_pred(__x); }
};
/** @brief Similar to std::binder1st,
/** @brief Similar to std::__binder1st,
* but giving the argument types explicitly. */
template<typename _Operation, typename first_argument_type,
typename second_argument_type, typename result_type>
class binder1st
: public std::unary_function<second_argument_type, result_type>
template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType>
class __binder1st
: public std::unary_function<_SecondArgumentType, _ResultType>
{
protected:
_Operation op;
first_argument_type value;
_Operation _M_op;
_FirstArgumentType _M_value;
public:
binder1st(const _Operation& __x,
const first_argument_type& __y)
: op(__x), value(__y) { }
__binder1st(const _Operation& __x,
const _FirstArgumentType& __y)
: _M_op(__x), _M_value(__y) { }
result_type
operator()(const second_argument_type& __x)
{ return op(value, __x); }
_ResultType
operator()(const _SecondArgumentType& __x)
{ return _M_op(_M_value, __x); }
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 109. Missing binders for non-const sequence elements
result_type
operator()(second_argument_type& __x) const
{ return op(value, __x); }
// 109. Missing binders for non-const __sequence __elements
_ResultType
operator()(_SecondArgumentType& __x) const
{ return _M_op(_M_value, __x); }
};
/**
* @brief Similar to std::binder2nd, but giving the argument types
* explicitly.
*/
template<typename _Operation, typename first_argument_type,
typename second_argument_type, typename result_type>
template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType>
class binder2nd
: public std::unary_function<first_argument_type, result_type>
: public std::unary_function<_FirstArgumentType, _ResultType>
{
protected:
_Operation op;
second_argument_type value;
_Operation _M_op;
_SecondArgumentType _M_value;
public:
binder2nd(const _Operation& __x,
const second_argument_type& __y)
: op(__x), value(__y) { }
const _SecondArgumentType& __y)
: _M_op(__x), _M_value(__y) { }
result_type
operator()(const first_argument_type& __x) const
{ return op(__x, value); }
_ResultType
operator()(const _FirstArgumentType& __x) const
{ return _M_op(__x, _M_value); }
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 109. Missing binders for non-const sequence elements
result_type
operator()(first_argument_type& __x)
{ return op(__x, value); }
// 109. Missing binders for non-const __sequence __elements
_ResultType
operator()(_FirstArgumentType& __x)
{ return _M_op(__x, _M_value); }
};
/** @brief Similar to std::equal_to, but allows two different types. */
template<typename T1, typename T2>
struct equal_to : std::binary_function<T1, T2, bool>
template<typename _T1, typename _T2>
struct equal_to : std::binary_function<_T1, _T2, bool>
{
bool operator()(const T1& t1, const T2& t2) const
{ return t1 == t2; }
bool operator()(const _T1& __t1, const _T2& __t2) const
{ return __t1 == __t2; }
};
/** @brief Similar to std::less, but allows two different types. */
template<typename T1, typename T2>
struct less : std::binary_function<T1, T2, bool>
template<typename _T1, typename _T2>
struct _Less : std::binary_function<_T1, _T2, bool>
{
bool
operator()(const T1& t1, const T2& t2) const
{ return t1 < t2; }
operator()(const _T1& __t1, const _T2& __t2) const
{ return __t1 < __t2; }
bool
operator()(const T2& t2, const T1& t1) const
{ return t2 < t1; }
operator()(const _T2& __t2, const _T1& __t1) const
{ return __t2 < __t1; }
};
// Partial specialization for one type. Same as std::less.
template<typename _Tp>
struct less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
struct _Less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
{
bool
operator()(const _Tp& __x, const _Tp& __y) const
@ -278,24 +278,24 @@ struct less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
/** @brief Similar to std::plus, but allows two different types. */
template<typename _Tp1, typename _Tp2>
struct plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
{
typedef __typeof__(*static_cast<_Tp1*>(NULL)
+ *static_cast<_Tp2*>(NULL)) result;
+ *static_cast<_Tp2*>(NULL)) __result;
result
__result
operator()(const _Tp1& __x, const _Tp2& __y) const
{ return __x + __y; }
};
// Partial specialization for one type. Same as std::plus.
template<typename _Tp>
struct plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{
typedef __typeof__(*static_cast<_Tp*>(NULL)
+ *static_cast<_Tp*>(NULL)) result;
+ *static_cast<_Tp*>(NULL)) __result;
result
__result
operator()(const _Tp& __x, const _Tp& __y) const
{ return __x + __y; }
};
@ -303,164 +303,164 @@ template<typename _Tp>
/** @brief Similar to std::multiplies, but allows two different types. */
template<typename _Tp1, typename _Tp2>
struct multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
{
typedef __typeof__(*static_cast<_Tp1*>(NULL)
* *static_cast<_Tp2*>(NULL)) result;
* *static_cast<_Tp2*>(NULL)) __result;
result
__result
operator()(const _Tp1& __x, const _Tp2& __y) const
{ return __x * __y; }
};
// Partial specialization for one type. Same as std::multiplies.
template<typename _Tp>
struct multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{
typedef __typeof__(*static_cast<_Tp*>(NULL)
* *static_cast<_Tp*>(NULL)) result;
* *static_cast<_Tp*>(NULL)) __result;
result
__result
operator()(const _Tp& __x, const _Tp& __y) const
{ return __x * __y; }
};
template<typename T, typename _DifferenceTp>
class pseudo_sequence;
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequence;
/** @brief Iterator associated with __gnu_parallel::pseudo_sequence.
/** @brief _Iterator associated with __gnu_parallel::_PseudoSequence.
* If features the usual random-access iterator functionality.
* @param T Sequence value type.
* @param difference_type Sequence difference type.
* @param _Tp Sequence _M_value type.
* @param _DifferenceType Sequence difference type.
*/
template<typename T, typename _DifferenceTp>
class pseudo_sequence_iterator
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequenceIterator
{
public:
typedef _DifferenceTp difference_type;
typedef _DifferenceTp _DifferenceType;
private:
typedef pseudo_sequence_iterator<T, _DifferenceTp> type;
typedef _PseudoSequenceIterator<_Tp, _DifferenceTp> _Self;
const T& val;
difference_type pos;
const _Tp& _M_val;
_DifferenceType _M_pos;
public:
pseudo_sequence_iterator(const T& val, difference_type pos)
: val(val), pos(pos) { }
_PseudoSequenceIterator(const _Tp& _M_val, _DifferenceType _M_pos)
: _M_val(_M_val), _M_pos(_M_pos) { }
// Pre-increment operator.
type&
_Self&
operator++()
{
++pos;
++_M_pos;
return *this;
}
// Post-increment operator.
const type
const _Self
operator++(int)
{ return type(pos++); }
{ return _Self(_M_pos++); }
const T&
const _Tp&
operator*() const
{ return val; }
{ return _M_val; }
const T&
operator[](difference_type) const
{ return val; }
const _Tp&
operator[](_DifferenceType) const
{ return _M_val; }
bool
operator==(const type& i2)
{ return pos == i2.pos; }
operator==(const _Self& __i2)
{ return _M_pos == __i2._M_pos; }
difference_type
operator!=(const type& i2)
{ return pos != i2.pos; }
_DifferenceType
operator!=(const _Self& __i2)
{ return _M_pos != __i2._M_pos; }
difference_type
operator-(const type& i2)
{ return pos - i2.pos; }
_DifferenceType
operator-(const _Self& __i2)
{ return _M_pos - __i2._M_pos; }
};
/** @brief Sequence that conceptually consists of multiple copies of
the same element.
* The copies are not stored explicitly, of course.
* @param T Sequence value type.
* @param difference_type Sequence difference type.
* @param _Tp Sequence _M_value type.
* @param _DifferenceType Sequence difference type.
*/
template<typename T, typename _DifferenceTp>
class pseudo_sequence
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequence
{
typedef pseudo_sequence<T, _DifferenceTp> type;
typedef _PseudoSequence<_Tp, _DifferenceTp> _Self;
public:
typedef _DifferenceTp difference_type;
typedef _DifferenceTp _DifferenceType;
// Better case down to uint64, than up to _DifferenceTp.
typedef pseudo_sequence_iterator<T, uint64> iterator;
typedef _PseudoSequenceIterator<_Tp, uint64> iterator;
/** @brief Constructor.
* @param val Element of the sequence.
* @param count Number of (virtual) copies.
* @param _M_val Element of the sequence.
* @param __count Number of (virtual) copies.
*/
pseudo_sequence(const T& val, difference_type count)
: val(val), count(count) { }
_PseudoSequence(const _Tp& _M_val, _DifferenceType __count)
: _M_val(_M_val), __count(__count) { }
/** @brief Begin iterator. */
iterator
begin() const
{ return iterator(val, 0); }
{ return iterator(_M_val, 0); }
/** @brief End iterator. */
iterator
end() const
{ return iterator(val, count); }
{ return iterator(_M_val, __count); }
private:
const T& val;
difference_type count;
const _Tp& _M_val;
_DifferenceType __count;
};
/** @brief Functor that does nothing */
template<typename _ValueTp>
class void_functor
class _VoidFunctor
{
inline void
operator()(const _ValueTp& v) const { }
operator()(const _ValueTp& __v) const { }
};
/** @brief Compute the median of three referenced elements,
according to @c comp.
* @param a First iterator.
* @param b Second iterator.
* @param c Third iterator.
* @param comp Comparator.
according to @__c __comp.
* @param __a First iterator.
* @param __b Second iterator.
* @param __c Third iterator.
* @param __comp Comparator.
*/
template<typename RandomAccessIterator, typename Comparator>
RandomAccessIterator
median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b,
RandomAccessIterator c, Comparator& comp)
template<typename _RAIter, typename _Compare>
_RAIter
__median_of_three_iterators(_RAIter __a, _RAIter __b,
_RAIter __c, _Compare& __comp)
{
if (comp(*a, *b))
if (comp(*b, *c))
return b;
if (__comp(*__a, *__b))
if (__comp(*__b, *__c))
return __b;
else
if (comp(*a, *c))
return c;
if (__comp(*__a, *__c))
return __c;
else
return a;
return __a;
else
{
// Just swap a and b.
if (comp(*a, *c))
return a;
// Just swap __a and __b.
if (__comp(*__a, *__c))
return __a;
else
if (comp(*b, *c))
return c;
if (__comp(*__b, *__c))
return __c;
else
return b;
return __b;
}
}

View File

@ -39,115 +39,115 @@
namespace __gnu_parallel
{
/**
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param comp Comparator.
* @return @c true if sorted, @c false otherwise.
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @return @__c true if sorted, @__c false otherwise.
*/
// XXX Comparator default template argument
template<typename InputIterator, typename Comparator>
// XXX Compare default template argument
template<typename _IIter, typename _Compare>
bool
is_sorted(InputIterator begin, InputIterator end,
Comparator comp
= std::less<typename std::iterator_traits<InputIterator>::
value_type>())
__is_sorted(_IIter __begin, _IIter __end,
_Compare __comp
= std::less<typename std::iterator_traits<_IIter>::
_ValueType>())
{
if (begin == end)
if (__begin == __end)
return true;
InputIterator current(begin), recent(begin);
_IIter __current(__begin), __recent(__begin);
unsigned long long position = 1;
for (current++; current != end; current++)
unsigned long long __position = 1;
for (__current++; __current != __end; __current++)
{
if (comp(*current, *recent))
if (__comp(*__current, *__recent))
{
printf("is_sorted: check failed before position %i.\n",
position);
printf("__is_sorted: check failed before position %__i.\n",
__position);
return false;
}
recent = current;
position++;
__recent = __current;
__position++;
}
return true;
}
/**
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* Prints the position in case an unordered pair is found.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param first_failure The first failure is returned in this variable.
* @param comp Comparator.
* @return @c true if sorted, @c false otherwise.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __first_failure The first failure is returned in this variable.
* @param __comp Comparator.
* @return @__c true if sorted, @__c false otherwise.
*/
// XXX Comparator default template argument
template<typename InputIterator, typename Comparator>
// XXX Compare default template argument
template<typename _IIter, typename _Compare>
bool
is_sorted_failure(InputIterator begin, InputIterator end,
InputIterator& first_failure,
Comparator comp
= std::less<typename std::iterator_traits<InputIterator>::
value_type>())
is_sorted_failure(_IIter __begin, _IIter __end,
_IIter& __first_failure,
_Compare __comp
= std::less<typename std::iterator_traits<_IIter>::
_ValueType>())
{
if (begin == end)
if (__begin == __end)
return true;
InputIterator current(begin), recent(begin);
_IIter __current(__begin), __recent(__begin);
unsigned long long position = 1;
for (current++; current != end; current++)
unsigned long long __position = 1;
for (__current++; __current != __end; __current++)
{
if (comp(*current, *recent))
if (__comp(*__current, *__recent))
{
first_failure = current;
printf("is_sorted: check failed before position %lld.\n",
position);
__first_failure = __current;
printf("__is_sorted: check failed before position %lld.\n",
__position);
return false;
}
recent = current;
position++;
__recent = __current;
__position++;
}
first_failure = end;
__first_failure = __end;
return true;
}
/**
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* Prints all unordered pair, including the surrounding two elements.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param comp Comparator.
* @return @c true if sorted, @c false otherwise.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @return @__c true if sorted, @__c false otherwise.
*/
template<typename InputIterator, typename Comparator>
template<typename _IIter, typename _Compare>
bool
// XXX Comparator default template argument
is_sorted_print_failures(InputIterator begin, InputIterator end,
Comparator comp
// XXX Compare default template argument
is_sorted_print_failures(_IIter __begin, _IIter __end,
_Compare __comp
= std::less<typename std::iterator_traits
<InputIterator>::value_type>())
<_IIter>::value_type>())
{
if (begin == end)
if (__begin == __end)
return true;
InputIterator recent(begin);
bool ok = true;
_IIter __recent(__begin);
bool __ok = true;
for (InputIterator pos(begin + 1); pos != end; pos++)
for (_IIter __pos(__begin + 1); __pos != __end; __pos++)
{
if (comp(*pos, *recent))
if (__comp(*__pos, *__recent))
{
printf("%ld: %d %d %d %d\n", pos - begin, *(pos - 2),
*(pos- 1), *pos, *(pos + 1));
ok = false;
printf("%ld: %d %d %d %d\n", __pos - __begin, *(__pos - 2),
*(__pos- 1), *__pos, *(__pos + 1));
__ok = false;
}
recent = pos;
__recent = __pos;
}
return ok;
return __ok;
}
}

View File

@ -61,24 +61,24 @@ __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
namespace __gnu_parallel
{
#if defined(__ICC)
template<typename must_be_int = int>
int32 faa32(int32* x, int32 inc)
template<typename _MustBeInt = int>
int32 __faa32(int32* __x, int32 __inc)
{
asm volatile("lock xadd %0,%1"
: "=r" (inc), "=m" (*x)
: "0" (inc)
: "=__r" (__inc), "=__m" (*__x)
: "0" (__inc)
: "memory");
return inc;
return __inc;
}
#if defined(__x86_64)
template<typename must_be_int = int>
int64 faa64(int64* x, int64 inc)
template<typename _MustBeInt = int>
int64 __faa64(int64* __x, int64 __inc)
{
asm volatile("lock xadd %0,%1"
: "=r" (inc), "=m" (*x)
: "0" (inc)
: "=__r" (__inc), "=__m" (*__x)
: "0" (__inc)
: "memory");
return inc;
return __inc;
}
#endif
#endif
@ -88,106 +88,106 @@ namespace __gnu_parallel
/** @brief Add a value to a variable, atomically.
*
* Implementation is heavily platform-dependent.
* @param ptr Pointer to a 32-bit signed integer.
* @param addend Value to add.
* @param __ptr Pointer to a 32-bit signed integer.
* @param __addend Value to add.
*/
inline int32
fetch_and_add_32(volatile int32* ptr, int32 addend)
__fetch_and_add_32(volatile int32* __ptr, int32 __addend)
{
#if defined(__ICC) //x86 version
return _InterlockedExchangeAdd((void*)ptr, addend);
return _InterlockedExchangeAdd((void*)__ptr, __addend);
#elif defined(__ECC) //IA-64 version
return _InterlockedExchangeAdd((void*)ptr, addend);
return _InterlockedExchangeAdd((void*)__ptr, __addend);
#elif defined(__ICL) || defined(_MSC_VER)
return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr),
addend);
return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr),
__addend);
#elif defined(__GNUC__)
return __sync_fetch_and_add(ptr, addend);
return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__SUNPRO_CC) && defined(__sparc)
volatile int32 before, after;
volatile int32 __before, __after;
do
{
before = *ptr;
after = before + addend;
} while (atomic_cas_32((volatile unsigned int*)ptr, before,
after) != before);
return before;
__before = *__ptr;
__after = __before + __addend;
} while (atomic_cas_32((volatile unsigned int*)__ptr, __before,
__after) != __before);
return __before;
#else //fallback, slow
#pragma message("slow fetch_and_add_32")
int32 res;
#pragma message("slow __fetch_and_add_32")
int32 __res;
#pragma omp critical
{
res = *ptr;
*(ptr) += addend;
__res = *__ptr;
*(__ptr) += __addend;
}
return res;
return __res;
#endif
}
/** @brief Add a value to a variable, atomically.
*
* Implementation is heavily platform-dependent.
* @param ptr Pointer to a 64-bit signed integer.
* @param addend Value to add.
* @param __ptr Pointer to a 64-bit signed integer.
* @param __addend Value to add.
*/
inline int64
fetch_and_add_64(volatile int64* ptr, int64 addend)
__fetch_and_add_64(volatile int64* __ptr, int64 __addend)
{
#if defined(__ICC) && defined(__x86_64) //x86 version
return faa64<int>((int64*)ptr, addend);
return __faa64<int>((int64*)__ptr, __addend);
#elif defined(__ECC) //IA-64 version
return _InterlockedExchangeAdd64((void*)ptr, addend);
return _InterlockedExchangeAdd64((void*)__ptr, __addend);
#elif defined(__ICL) || defined(_MSC_VER)
#ifndef _WIN64
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
return 0;
#else
return _InterlockedExchangeAdd64(ptr, addend);
return _InterlockedExchangeAdd64(__ptr, __addend);
#endif
#elif defined(__GNUC__) && defined(__x86_64)
return __sync_fetch_and_add(ptr, addend);
return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__GNUC__) && defined(__i386) && \
(defined(__i686) || defined(__pentium4) || defined(__athlon))
return __sync_fetch_and_add(ptr, addend);
return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__SUNPRO_CC) && defined(__sparc)
volatile int64 before, after;
volatile int64 __before, __after;
do
{
before = *ptr;
after = before + addend;
} while (atomic_cas_64((volatile unsigned long long*)ptr, before,
after) != before);
return before;
__before = *__ptr;
__after = __before + __addend;
} while (atomic_cas_64((volatile unsigned long long*)__ptr, __before,
__after) != __before);
return __before;
#else //fallback, slow
#if defined(__GNUC__) && defined(__i386)
// XXX doesn't work with -march=native
// XXX doesn'__t work with -march=native
//#warning "please compile with -march=i686 or better"
#endif
#pragma message("slow fetch_and_add_64")
int64 res;
#pragma message("slow __fetch_and_add_64")
int64 __res;
#pragma omp critical
{
res = *ptr;
*(ptr) += addend;
__res = *__ptr;
*(__ptr) += __addend;
}
return res;
return __res;
#endif
}
/** @brief Add a value to a variable, atomically.
*
* Implementation is heavily platform-dependent.
* @param ptr Pointer to a signed integer.
* @param addend Value to add.
* @param __ptr Pointer to a signed integer.
* @param __addend Value to add.
*/
template<typename T>
inline T
fetch_and_add(volatile T* ptr, T addend)
template<typename _Tp>
inline _Tp
__fetch_and_add(volatile _Tp* __ptr, _Tp __addend)
{
if (sizeof(T) == sizeof(int32))
return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend);
else if (sizeof(T) == sizeof(int64))
return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend);
if (sizeof(_Tp) == sizeof(int32))
return (_Tp)__fetch_and_add_32((volatile int32*) __ptr, (int32)__addend);
else if (sizeof(_Tp) == sizeof(int64))
return (_Tp)__fetch_and_add_64((volatile int64*) __ptr, (int64)__addend);
else
_GLIBCXX_PARALLEL_ASSERT(false);
}
@ -195,141 +195,141 @@ namespace __gnu_parallel
#if defined(__ICC)
template<typename must_be_int = int>
template<typename _MustBeInt = int>
inline int32
cas32(volatile int32* ptr, int32 old, int32 nw)
__cas32(volatile int32* __ptr, int32 __old, int32 __nw)
{
int32 before;
int32 __before;
__asm__ __volatile__("lock; cmpxchgl %1,%2"
: "=a"(before)
: "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
: "=a"(__before)
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old)
: "memory");
return before;
return __before;
}
#if defined(__x86_64)
template<typename must_be_int = int>
template<typename _MustBeInt = int>
inline int64
cas64(volatile int64 *ptr, int64 old, int64 nw)
__cas64(volatile int64 *__ptr, int64 __old, int64 __nw)
{
int64 before;
int64 __before;
__asm__ __volatile__("lock; cmpxchgq %1,%2"
: "=a"(before)
: "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
: "=a"(__before)
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old)
: "memory");
return before;
return __before;
}
#endif
#endif
/** @brief Compare @c *ptr and @c comparand. If equal, let @c
* *ptr=replacement and return @c true, return @c false otherwise.
/** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c
* *__ptr=__replacement and return @__c true, return @__c false otherwise.
*
* Implementation is heavily platform-dependent.
* @param ptr Pointer to 32-bit signed integer.
* @param comparand Compare value.
* @param replacement Replacement value.
* @param __ptr Pointer to 32-bit signed integer.
* @param __comparand Compare value.
* @param __replacement Replacement value.
*/
inline bool
compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement)
__compare_and_swap_32(volatile int32* __ptr, int32 __comparand, int32 __replacement)
{
#if defined(__ICC) //x86 version
return _InterlockedCompareExchange((void*)ptr, replacement,
comparand) == comparand;
return _InterlockedCompareExchange((void*)__ptr, __replacement,
__comparand) == __comparand;
#elif defined(__ECC) //IA-64 version
return _InterlockedCompareExchange((void*)ptr, replacement,
comparand) == comparand;
return _InterlockedCompareExchange((void*)__ptr, __replacement,
__comparand) == __comparand;
#elif defined(__ICL) || defined(_MSC_VER)
return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr),
replacement, comparand) == comparand;
return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(__ptr),
__replacement, __comparand) == __comparand;
#elif defined(__GNUC__)
return __sync_bool_compare_and_swap(ptr, comparand, replacement);
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__SUNPRO_CC) && defined(__sparc)
return atomic_cas_32((volatile unsigned int*)ptr, comparand,
replacement) == comparand;
return atomic_cas_32((volatile unsigned int*)__ptr, __comparand,
__replacement) == __comparand;
#else
#pragma message("slow compare_and_swap_32")
bool res = false;
#pragma message("slow __compare_and_swap_32")
bool __res = false;
#pragma omp critical
{
if (*ptr == comparand)
if (*__ptr == __comparand)
{
*ptr = replacement;
res = true;
*__ptr = __replacement;
__res = true;
}
}
return res;
return __res;
#endif
}
/** @brief Compare @c *ptr and @c comparand. If equal, let @c
* *ptr=replacement and return @c true, return @c false otherwise.
/** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c
* *__ptr=__replacement and return @__c true, return @__c false otherwise.
*
* Implementation is heavily platform-dependent.
* @param ptr Pointer to 64-bit signed integer.
* @param comparand Compare value.
* @param replacement Replacement value.
* @param __ptr Pointer to 64-bit signed integer.
* @param __comparand Compare value.
* @param __replacement Replacement value.
*/
inline bool
compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement)
__compare_and_swap_64(volatile int64* __ptr, int64 __comparand, int64 __replacement)
{
#if defined(__ICC) && defined(__x86_64) //x86 version
return cas64<int>(ptr, comparand, replacement) == comparand;
return __cas64<int>(__ptr, __comparand, __replacement) == __comparand;
#elif defined(__ECC) //IA-64 version
return _InterlockedCompareExchange64((void*)ptr, replacement,
comparand) == comparand;
return _InterlockedCompareExchange64((void*)__ptr, __replacement,
__comparand) == __comparand;
#elif defined(__ICL) || defined(_MSC_VER)
#ifndef _WIN64
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
return 0;
#else
return _InterlockedCompareExchange64(ptr, replacement,
comparand) == comparand;
return _InterlockedCompareExchange64(__ptr, __replacement,
__comparand) == __comparand;
#endif
#elif defined(__GNUC__) && defined(__x86_64)
return __sync_bool_compare_and_swap(ptr, comparand, replacement);
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__GNUC__) && defined(__i386) && \
(defined(__i686) || defined(__pentium4) || defined(__athlon))
return __sync_bool_compare_and_swap(ptr, comparand, replacement);
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__SUNPRO_CC) && defined(__sparc)
return atomic_cas_64((volatile unsigned long long*)ptr,
comparand, replacement) == comparand;
return atomic_cas_64((volatile unsigned long long*)__ptr,
__comparand, __replacement) == __comparand;
#else
#if defined(__GNUC__) && defined(__i386)
// XXX -march=native
//#warning "please compile with -march=i686 or better"
#endif
#pragma message("slow compare_and_swap_64")
bool res = false;
#pragma message("slow __compare_and_swap_64")
bool __res = false;
#pragma omp critical
{
if (*ptr == comparand)
if (*__ptr == __comparand)
{
*ptr = replacement;
res = true;
*__ptr = __replacement;
__res = true;
}
}
return res;
return __res;
#endif
}
/** @brief Compare @c *ptr and @c comparand. If equal, let @c
* *ptr=replacement and return @c true, return @c false otherwise.
/** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c
* *__ptr=__replacement and return @__c true, return @__c false otherwise.
*
* Implementation is heavily platform-dependent.
* @param ptr Pointer to signed integer.
* @param comparand Compare value.
* @param replacement Replacement value. */
template<typename T>
* @param __ptr Pointer to signed integer.
* @param __comparand Compare value.
* @param __replacement Replacement value. */
template<typename _Tp>
inline bool
compare_and_swap(volatile T* ptr, T comparand, T replacement)
__compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement)
{
if (sizeof(T) == sizeof(int32))
return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement);
else if (sizeof(T) == sizeof(int64))
return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement);
if (sizeof(_Tp) == sizeof(int32))
return __compare_and_swap_32((volatile int32*) __ptr, (int32)__comparand, (int32)__replacement);
else if (sizeof(_Tp) == sizeof(int64))
return __compare_and_swap_64((volatile int64*) __ptr, (int64)__comparand, (int64)__replacement);
else
_GLIBCXX_PARALLEL_ASSERT(false);
}
@ -337,7 +337,7 @@ namespace __gnu_parallel
/** @brief Yield the control to another thread, without waiting for
the end to the time slice. */
inline void
yield()
__yield()
{
#if defined (_WIN32) && !defined (__CYGWIN__)
Sleep(0);

View File

@ -38,15 +38,15 @@
/** @def _GLIBCXX_CALL
* @brief Macro to produce log message when entering a function.
* @param n Input size.
* @param __n Input size.
* @see _GLIBCXX_VERBOSE_LEVEL */
#if (_GLIBCXX_VERBOSE_LEVEL == 0)
#define _GLIBCXX_CALL(n)
#define _GLIBCXX_CALL(__n)
#endif
#if (_GLIBCXX_VERBOSE_LEVEL == 1)
#define _GLIBCXX_CALL(n) \
printf(" %s:\niam = %d, n = %ld, num_threads = %d\n", \
__PRETTY_FUNCTION__, omp_get_thread_num(), (n), get_max_threads());
#define _GLIBCXX_CALL(__n) \
printf(" %__s:\niam = %d, __n = %ld, __num_threads = %d\n", \
__PRETTY_FUNCTION__, omp_get_thread_num(), (__n), __get_max_threads());
#endif
#ifndef _GLIBCXX_SCALE_DOWN_FPU
@ -64,12 +64,12 @@
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
* Consider the size of the L1 cache for
* __gnu_parallel::parallel_random_shuffle(). */
* gnu_parallel::__parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
#endif
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
* Consider the size of the TLB for
* __gnu_parallel::parallel_random_shuffle(). */
* gnu_parallel::__parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
#endif

View File

@ -33,54 +33,54 @@
namespace __gnu_parallel
{
/** @brief Function to split a sequence into parts of almost equal size.
/** @brief function to split a sequence into parts of almost equal size.
*
* The resulting sequence s of length num_threads+1 contains the splitting
* positions when splitting the range [0,n) into parts of almost
* The resulting sequence __s of length __num_threads+1 contains the splitting
* positions when splitting the range [0,__n) into parts of almost
* equal size (plus minus 1). The first entry is 0, the last one
* n. There may result empty parts.
* @param n Number of elements
* @param num_threads Number of parts
* @param s Splitters
* @returns End of splitter sequence, i. e. @c s+num_threads+1 */
template<typename difference_type, typename OutputIterator>
OutputIterator
equally_split(difference_type n, thread_index_t num_threads, OutputIterator s)
* n. There may result empty parts.
* @param __n Number of elements
* @param __num_threads Number of parts
* @param __s Splitters
* @returns End of splitter sequence, i.e. @__c __s+__num_threads+1 */
template<typename _DifferenceType, typename _OutputIterator>
_OutputIterator
equally_split(_DifferenceType __n, _ThreadIndex __num_threads, _OutputIterator __s)
{
difference_type chunk_length = n / num_threads;
difference_type num_longer_chunks = n % num_threads;
difference_type pos = 0;
for (thread_index_t i = 0; i < num_threads; ++i)
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __num_longer_chunks = __n % __num_threads;
_DifferenceType __pos = 0;
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{
*s++ = pos;
pos += (i < num_longer_chunks) ? (chunk_length + 1) : chunk_length;
*__s++ = __pos;
__pos += (__i < __num_longer_chunks) ? (__chunk_length + 1) : __chunk_length;
}
*s++ = n;
return s;
*__s++ = __n;
return __s;
}
/** @brief Function to split a sequence into parts of almost equal size.
/** @brief function to split a sequence into parts of almost equal size.
*
* Returns the position of the splitting point between
* thread number thread_no (included) and
* thread number thread_no+1 (excluded).
* @param n Number of elements
* @param num_threads Number of parts
* @returns _SplittingAlgorithm point */
template<typename difference_type>
difference_type
equally_split_point(difference_type n,
thread_index_t num_threads,
thread_index_t thread_no)
* thread number __thread_no (included) and
* thread number __thread_no+1 (excluded).
* @param __n Number of elements
* @param __num_threads Number of parts
* @returns splitting point */
template<typename _DifferenceType>
_DifferenceType
equally_split_point(_DifferenceType __n,
_ThreadIndex __num_threads,
_ThreadIndex __thread_no)
{
difference_type chunk_length = n / num_threads;
difference_type num_longer_chunks = n % num_threads;
if (thread_no < num_longer_chunks)
return thread_no * (chunk_length + 1);
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __num_longer_chunks = __n % __num_threads;
if (__thread_no < __num_longer_chunks)
return __thread_no * (__chunk_length + 1);
else
return num_longer_chunks * (chunk_length + 1)
+ (thread_no - num_longer_chunks) * chunk_length;
return __num_longer_chunks * (__chunk_length + 1)
+ (__thread_no - __num_longer_chunks) * __chunk_length;
}
}

View File

@ -78,7 +78,7 @@
#ifndef _GLIBCXX_TREE_INITIAL_SPLITTING
/** @def _GLIBCXX_TREE_INITIAL_SPLITTING
* @brief Include the initial splitting variant for
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end).
* _Rb_tree::insert_unique(_IIter beg, _IIter __end).
* @see __gnu_parallel::_Rb_tree */
#define _GLIBCXX_TREE_INITIAL_SPLITTING 1
#endif
@ -86,7 +86,7 @@
#ifndef _GLIBCXX_TREE_DYNAMIC_BALANCING
/** @def _GLIBCXX_TREE_DYNAMIC_BALANCING
* @brief Include the dynamic balancing variant for
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end).
* _Rb_tree::insert_unique(_IIter beg, _IIter __end).
* @see __gnu_parallel::_Rb_tree */
#define _GLIBCXX_TREE_DYNAMIC_BALANCING 1
#endif
@ -94,7 +94,7 @@
#ifndef _GLIBCXX_TREE_FULL_COPY
/** @def _GLIBCXX_TREE_FULL_COPY
* @brief In order to sort the input sequence of
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end) a
* _Rb_tree::insert_unique(_IIter beg, _IIter __end) a
* full copy of the input elements is done.
* @see __gnu_parallel::_Rb_tree */
#define _GLIBCXX_TREE_FULL_COPY 1

View File

@ -23,7 +23,7 @@
// <http://www.gnu.org/licenses/>.
/** @file parallel/find.h
* @brief Parallel implementation base for std::find(), std::equal()
* @brief Parallel implementation __base for std::find(), std::equal()
* and related functions.
* This file is a GNU parallel extension to the Standard C++ Library.
*/
@ -44,36 +44,36 @@ namespace __gnu_parallel
{
/**
* @brief Parallel std::find, switch for different algorithms.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence. Must have same
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Must have same
* length as first sequence.
* @param pred Find predicate.
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
* @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
* @return Place of finding in both sequences.
*/
template<typename RandomAccessIterator1,
typename RandomAccessIterator2,
typename Pred,
typename Selector>
inline std::pair<RandomAccessIterator1, RandomAccessIterator2>
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred, Selector selector)
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
inline std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector)
{
switch (_Settings::get().find_algorithm)
{
case GROWING_BLOCKS:
return find_template(begin1, end1, begin2, pred, selector,
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
growing_blocks_tag());
case CONSTANT_SIZE_BLOCKS:
return find_template(begin1, end1, begin2, pred, selector,
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
constant_size_blocks_tag());
case EQUAL_SPLIT:
return find_template(begin1, end1, begin2, pred, selector,
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
equal_split_tag());
default:
_GLIBCXX_PARALLEL_ASSERT(false);
return std::make_pair(begin1, begin2);
return std::make_pair(__begin1, __begin2);
}
}
@ -81,80 +81,80 @@ template<typename RandomAccessIterator1,
/**
* @brief Parallel std::find, equal splitting variant.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence. Second sequence
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence.
* @param pred Find predicate.
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
* @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
* @return Place of finding in both sequences.
*/
template<typename RandomAccessIterator1,
typename RandomAccessIterator2,
typename Pred,
typename Selector>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
find_template(RandomAccessIterator1 begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2 begin2,
Pred pred,
Selector selector,
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2,
_Pred __pred,
_Selector __selector,
equal_split_tag)
{
_GLIBCXX_CALL(end1 - begin1)
_GLIBCXX_CALL(__end1 - __begin1)
typedef std::iterator_traits<RandomAccessIterator1> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef typename traits_type::value_type value_type;
typedef std::iterator_traits<_RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename _TraitsType::value_type _ValueType;
difference_type length = end1 - begin1;
difference_type result = length;
difference_type* borders;
_DifferenceType __length = __end1 - __begin1;
_DifferenceType __result = __length;
_DifferenceType* __borders;
omp_lock_t result_lock;
omp_init_lock(&result_lock);
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
thread_index_t num_threads = get_max_threads();
# pragma omp parallel num_threads(num_threads)
_ThreadIndex __num_threads = __get_max_threads();
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
borders = new difference_type[num_threads + 1];
equally_split(length, num_threads, borders);
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 1];
equally_split(__length, __num_threads, __borders);
} //single
thread_index_t iam = omp_get_thread_num();
difference_type start = borders[iam], stop = borders[iam + 1];
_ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __start = __borders[__iam], __stop = __borders[__iam + 1];
RandomAccessIterator1 i1 = begin1 + start;
RandomAccessIterator2 i2 = begin2 + start;
for (difference_type pos = start; pos < stop; ++pos)
_RAIter1 __i1 = __begin1 + __start;
_RAIter2 __i2 = __begin2 + __start;
for (_DifferenceType __pos = __start; __pos < __stop; ++__pos)
{
#pragma omp flush(result)
#pragma omp flush(__result)
// Result has been set to something lower.
if (result < pos)
if (__result < __pos)
break;
if (selector(i1, i2, pred))
if (__selector(__i1, __i2, __pred))
{
omp_set_lock(&result_lock);
if (pos < result)
result = pos;
omp_unset_lock(&result_lock);
omp_set_lock(&__result_lock);
if (__pos < __result)
__result = __pos;
omp_unset_lock(&__result_lock);
break;
}
++i1;
++i2;
++__i1;
++__i2;
}
} //parallel
omp_destroy_lock(&result_lock);
delete[] borders;
omp_destroy_lock(&__result_lock);
delete[] __borders;
return
std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
begin2 + result);
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
}
#endif
@ -163,12 +163,12 @@ template<typename RandomAccessIterator1,
/**
* @brief Parallel std::find, growing block size variant.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence. Second sequence
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence.
* @param pred Find predicate.
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
* @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
* @return Place of finding in both sequences.
* @see __gnu_parallel::_Settings::find_sequential_search_size
* @see __gnu_parallel::_Settings::find_initial_block_size
@ -183,105 +183,105 @@ template<typename RandomAccessIterator1,
* for CSB, the blocks are allocated in a predetermined manner,
* namely spacial round-robin.
*/
template<typename RandomAccessIterator1,
typename RandomAccessIterator2,
typename Pred,
typename Selector>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred, Selector selector,
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
growing_blocks_tag)
{
_GLIBCXX_CALL(end1 - begin1)
_GLIBCXX_CALL(__end1 - __begin1)
typedef std::iterator_traits<RandomAccessIterator1> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef typename traits_type::value_type value_type;
typedef std::iterator_traits<_RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename _TraitsType::value_type _ValueType;
const _Settings& __s = _Settings::get();
difference_type length = end1 - begin1;
_DifferenceType __length = __end1 - __begin1;
difference_type sequential_search_size =
std::min<difference_type>(length, __s.find_sequential_search_size);
_DifferenceType __sequential_search_size =
std::min<_DifferenceType>(__length, __s.find_sequential_search_size);
// Try it sequentially first.
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
selector.sequential_algorithm(
begin1, begin1 + sequential_search_size, begin2, pred);
std::pair<_RAIter1, _RAIter2> __find_seq_result =
__selector._M_sequential_algorithm(
__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
if (find_seq_result.first != (begin1 + sequential_search_size))
return find_seq_result;
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
return __find_seq_result;
// Index of beginning of next free block (after sequential find).
difference_type next_block_start = sequential_search_size;
difference_type result = length;
_DifferenceType __next_block_start = __sequential_search_size;
_DifferenceType __result = __length;
omp_lock_t result_lock;
omp_init_lock(&result_lock);
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
thread_index_t num_threads = get_max_threads();
# pragma omp parallel shared(result) num_threads(num_threads)
_ThreadIndex __num_threads = __get_max_threads();
# pragma omp parallel shared(__result) num_threads(__num_threads)
{
# pragma omp single
num_threads = omp_get_num_threads();
__num_threads = omp_get_num_threads();
// Not within first k elements -> start parallel.
thread_index_t iam = omp_get_thread_num();
// Not within first __k __elements -> start parallel.
_ThreadIndex __iam = omp_get_thread_num();
difference_type block_size = __s.find_initial_block_size;
difference_type start =
fetch_and_add<difference_type>(&next_block_start, block_size);
_DifferenceType __block_size = __s.find_initial_block_size;
_DifferenceType __start =
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
// Get new block, update pointer to next block.
difference_type stop =
std::min<difference_type>(length, start + block_size);
_DifferenceType __stop =
std::min<_DifferenceType>(__length, __start + __block_size);
std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
std::pair<_RAIter1, _RAIter2> __local_result;
while (start < length)
while (__start < __length)
{
# pragma omp flush(result)
# pragma omp flush(__result)
// Get new value of result.
if (result < start)
if (__result < __start)
{
// No chance to find first element.
break;
}
local_result = selector.sequential_algorithm(
begin1 + start, begin1 + stop, begin2 + start, pred);
if (local_result.first != (begin1 + stop))
__local_result = __selector._M_sequential_algorithm(
__begin1 + __start, __begin1 + __stop, __begin2 + __start, __pred);
if (__local_result.first != (__begin1 + __stop))
{
omp_set_lock(&result_lock);
if ((local_result.first - begin1) < result)
omp_set_lock(&__result_lock);
if ((__local_result.first - __begin1) < __result)
{
result = local_result.first - begin1;
__result = __local_result.first - __begin1;
// Result cannot be in future blocks, stop algorithm.
fetch_and_add<difference_type>(&next_block_start, length);
__fetch_and_add<_DifferenceType>(&__next_block_start, __length);
}
omp_unset_lock(&result_lock);
omp_unset_lock(&__result_lock);
}
block_size =
std::min<difference_type>(block_size * __s.find_increasing_factor,
__block_size =
std::min<_DifferenceType>(__block_size * __s.find_increasing_factor,
__s.find_maximum_block_size);
// Get new block, update pointer to next block.
start =
fetch_and_add<difference_type>(&next_block_start, block_size);
stop = ((length < (start + block_size))
? length : (start + block_size));
__start =
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
__stop = ((__length < (__start + __block_size))
? __length : (__start + __block_size));
}
} //parallel
omp_destroy_lock(&result_lock);
omp_destroy_lock(&__result_lock);
// Return iterator on found element.
return
std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
begin2 + result);
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
}
#endif
@ -290,12 +290,12 @@ template<typename RandomAccessIterator1,
/**
* @brief Parallel std::find, constant block size variant.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence. Second sequence
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence.
* @param pred Find predicate.
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
* @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
* @return Place of finding in both sequences.
* @see __gnu_parallel::_Settings::find_sequential_search_size
* @see __gnu_parallel::_Settings::find_block_size
@ -306,94 +306,94 @@ template<typename RandomAccessIterator1,
* blocks are allocated in a predetermined manner, namely spacial
* round-robin.
*/
template<typename RandomAccessIterator1,
typename RandomAccessIterator2,
typename Pred,
typename Selector>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred, Selector selector,
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
constant_size_blocks_tag)
{
_GLIBCXX_CALL(end1 - begin1)
typedef std::iterator_traits<RandomAccessIterator1> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef typename traits_type::value_type value_type;
_GLIBCXX_CALL(__end1 - __begin1)
typedef std::iterator_traits<_RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename _TraitsType::value_type _ValueType;
const _Settings& __s = _Settings::get();
difference_type length = end1 - begin1;
_DifferenceType __length = __end1 - __begin1;
difference_type sequential_search_size = std::min<difference_type>(
length, __s.find_sequential_search_size);
_DifferenceType __sequential_search_size = std::min<_DifferenceType>(
__length, __s.find_sequential_search_size);
// Try it sequentially first.
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
selector.sequential_algorithm(begin1, begin1 + sequential_search_size,
begin2, pred);
std::pair<_RAIter1, _RAIter2> __find_seq_result =
__selector._M_sequential_algorithm(__begin1, __begin1 + __sequential_search_size,
__begin2, __pred);
if (find_seq_result.first != (begin1 + sequential_search_size))
return find_seq_result;
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
return __find_seq_result;
difference_type result = length;
omp_lock_t result_lock;
omp_init_lock(&result_lock);
_DifferenceType __result = __length;
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
// Not within first sequential_search_size elements -> start parallel.
// Not within first __sequential_search_size elements -> start parallel.
thread_index_t num_threads = get_max_threads();
# pragma omp parallel shared(result) num_threads(num_threads)
_ThreadIndex __num_threads = __get_max_threads();
# pragma omp parallel shared(__result) num_threads(__num_threads)
{
# pragma omp single
num_threads = omp_get_num_threads();
__num_threads = omp_get_num_threads();
thread_index_t iam = omp_get_thread_num();
difference_type block_size = __s.find_initial_block_size;
_ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __block_size = __s.find_initial_block_size;
// First element of thread's current iteration.
difference_type iteration_start = sequential_search_size;
_DifferenceType __iteration_start = __sequential_search_size;
// Where to work (initialization).
difference_type start = iteration_start + iam * block_size;
difference_type stop =
std::min<difference_type>(length, start + block_size);
_DifferenceType __start = __iteration_start + __iam * __block_size;
_DifferenceType __stop =
std::min<_DifferenceType>(__length, __start + __block_size);
std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
std::pair<_RAIter1, _RAIter2> __local_result;
while (start < length)
while (__start < __length)
{
// Get new value of result.
# pragma omp flush(result)
# pragma omp flush(__result)
// No chance to find first element.
if (result < start)
if (__result < __start)
break;
local_result = selector.sequential_algorithm(
begin1 + start, begin1 + stop,
begin2 + start, pred);
if (local_result.first != (begin1 + stop))
__local_result = __selector._M_sequential_algorithm(
__begin1 + __start, __begin1 + __stop,
__begin2 + __start, __pred);
if (__local_result.first != (__begin1 + __stop))
{
omp_set_lock(&result_lock);
if ((local_result.first - begin1) < result)
result = local_result.first - begin1;
omp_unset_lock(&result_lock);
omp_set_lock(&__result_lock);
if ((__local_result.first - __begin1) < __result)
__result = __local_result.first - __begin1;
omp_unset_lock(&__result_lock);
// Will not find better value in its interval.
break;
}
iteration_start += num_threads * block_size;
__iteration_start += __num_threads * __block_size;
// Where to work.
start = iteration_start + iam * block_size;
stop = std::min<difference_type>(length, start + block_size);
__start = __iteration_start + __iam * __block_size;
__stop = std::min<_DifferenceType>(__length, __start + __block_size);
}
} //parallel
omp_destroy_lock(&result_lock);
omp_destroy_lock(&__result_lock);
// Return iterator on found element.
return
std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
begin2 + result);
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
}
#endif
} // end namespace

View File

@ -23,7 +23,7 @@
// <http://www.gnu.org/licenses/>.
/** @file parallel/find_selectors.h
* @brief Function objects representing different tasks to be plugged
* @brief _Function objects representing different tasks to be plugged
* into the parallel find algorithm.
* This file is a GNU parallel extension to the Standard C++ Library.
*/
@ -39,153 +39,153 @@
namespace __gnu_parallel
{
/** @brief Base class of all __gnu_parallel::find_template selectors. */
struct generic_find_selector
/** @brief Base class of all __gnu_parallel::__find_template selectors. */
struct __generic_find_selector
{ };
/**
* @brief Test predicate on a single element, used for std::find()
* and std::find_if ().
*/
struct find_if_selector : public generic_find_selector
struct __find_if_selector : public __generic_find_selector
{
/** @brief Test on one position.
* @param i1 Iterator on first sequence.
* @param i2 Iterator on second sequence (unused).
* @param pred Find predicate.
/** @brief Test on one __position.
* @param __i1 _Iterator on first sequence.
* @param __i2 _Iterator on second sequence (unused).
* @param __pred Find predicate.
*/
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Pred>
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
bool
operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
{ return pred(*i1); }
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{ return __pred(*__i1); }
/** @brief Corresponding sequential algorithm on a sequence.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param pred Find predicate.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __pred Find predicate.
*/
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Pred>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
sequential_algorithm(RandomAccessIterator1 begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred)
{ return std::make_pair(find_if(begin1, end1, pred,
sequential_tag()), begin2); }
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{ return std::make_pair(find_if(__begin1, __end1, __pred,
sequential_tag()), __begin2); }
};
/** @brief Test predicate on two adjacent elements. */
struct adjacent_find_selector : public generic_find_selector
/** @brief Test predicate on two adjacent __elements. */
struct __adjacent_find_selector : public __generic_find_selector
{
/** @brief Test on one position.
* @param i1 Iterator on first sequence.
* @param i2 Iterator on second sequence (unused).
* @param pred Find predicate.
/** @brief Test on one __position.
* @param __i1 _Iterator on first sequence.
* @param __i2 _Iterator on second sequence (unused).
* @param __pred Find predicate.
*/
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Pred>
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
bool
operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{
// Passed end iterator is one short.
return pred(*i1, *(i1 + 1));
return __pred(*__i1, *(__i1 + 1));
}
/** @brief Corresponding sequential algorithm on a sequence.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param pred Find predicate.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __pred Find predicate.
*/
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Pred>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
sequential_algorithm(RandomAccessIterator1 begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred)
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{
// Passed end iterator is one short.
RandomAccessIterator1 spot = adjacent_find(begin1, end1 + 1,
pred, sequential_tag());
if (spot == (end1 + 1))
spot = end1;
return std::make_pair(spot, begin2);
_RAIter1 spot = adjacent_find(__begin1, __end1 + 1,
__pred, sequential_tag());
if (spot == (__end1 + 1))
spot = __end1;
return std::make_pair(spot, __begin2);
}
};
/** @brief Test inverted predicate on a single element. */
struct mismatch_selector : public generic_find_selector
struct __mismatch_selector : public __generic_find_selector
{
/**
* @brief Test on one position.
* @param i1 Iterator on first sequence.
* @param i2 Iterator on second sequence (unused).
* @param pred Find predicate.
* @brief Test on one __position.
* @param __i1 _Iterator on first sequence.
* @param __i2 _Iterator on second sequence (unused).
* @param __pred Find predicate.
*/
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Pred>
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
bool
operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
{ return !pred(*i1, *i2); }
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{ return !__pred(*__i1, *__i2); }
/**
* @brief Corresponding sequential algorithm on a sequence.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param pred Find predicate.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __pred Find predicate.
*/
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Pred>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
sequential_algorithm(RandomAccessIterator1 begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred)
{ return mismatch(begin1, end1, begin2, pred, sequential_tag()); }
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{ return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag()); }
};
/** @brief Test predicate on several elements. */
template<typename ForwardIterator>
struct find_first_of_selector : public generic_find_selector
/** @brief Test predicate on several __elements. */
template<typename _ForwardIterator>
struct __find_first_of_selector : public __generic_find_selector
{
ForwardIterator begin;
ForwardIterator end;
_ForwardIterator __begin;
_ForwardIterator __end;
explicit find_first_of_selector(ForwardIterator begin, ForwardIterator end)
: begin(begin), end(end) { }
explicit __find_first_of_selector(_ForwardIterator __begin, _ForwardIterator __end)
: __begin(__begin), __end(__end) { }
/** @brief Test on one position.
* @param i1 Iterator on first sequence.
* @param i2 Iterator on second sequence (unused).
* @param pred Find predicate. */
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Pred>
/** @brief Test on one __position.
* @param __i1 _Iterator on first sequence.
* @param __i2 _Iterator on second sequence (unused).
* @param __pred Find predicate. */
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
bool
operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{
for (ForwardIterator pos_in_candidates = begin;
pos_in_candidates != end; ++pos_in_candidates)
if (pred(*i1, *pos_in_candidates))
for (_ForwardIterator __pos_in_candidates = __begin;
__pos_in_candidates != __end; ++__pos_in_candidates)
if (__pred(*__i1, *__pos_in_candidates))
return true;
return false;
}
/** @brief Corresponding sequential algorithm on a sequence.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param pred Find predicate. */
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename Pred>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
sequential_algorithm(RandomAccessIterator1 begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred)
{ return std::make_pair(find_first_of(begin1, end1, begin, end, pred,
sequential_tag()), begin2); }
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __pred Find predicate. */
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{ return std::make_pair(find_first_of(__begin1, __end1, __begin, __end, __pred,
sequential_tag()), __begin2); }
};
}

View File

@ -42,55 +42,55 @@
namespace __gnu_parallel
{
/** @brief Chose the desired algorithm by evaluating @c parallelism_tag.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param user_op A user-specified functor (comparator, predicate,
/** @brief Chose the desired algorithm by evaluating @__c __parallelism_tag.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __user_op A user-specified functor (comparator, predicate,
* associative operator,...)
* @param functionality functor to "process" an element with
* user_op (depends on desired functionality, e. g. accumulate,
* @param __functionality functor to "process" an element with
* __user_op (depends on desired functionality, e. g. accumulate,
* for_each,...
* @param reduction Reduction functor.
* @param reduction_start Initial value for reduction.
* @param output Output iterator.
* @param bound Maximum number of elements processed.
* @param parallelism_tag Parallelization method */
template<typename InputIterator, typename UserOp,
typename Functionality, typename Red, typename Result>
UserOp
for_each_template_random_access(InputIterator begin, InputIterator end,
UserOp user_op,
Functionality& functionality,
Red reduction, Result reduction_start,
Result& output, typename
std::iterator_traits<InputIterator>::
difference_type bound,
_Parallelism parallelism_tag)
* @param __reduction Reduction functor.
* @param __reduction_start Initial value for reduction.
* @param __output Output iterator.
* @param __bound Maximum number of elements processed.
* @param __parallelism_tag Parallelization method */
template<typename _IIter, typename _UserOp,
typename _Functionality, typename _Red, typename _Result>
_UserOp
__for_each_template_random_access(_IIter __begin, _IIter __end,
_UserOp __user_op,
_Functionality& __functionality,
_Red __reduction, _Result __reduction_start,
_Result& __output, typename
std::iterator_traits<_IIter>::
difference_type __bound,
_Parallelism __parallelism_tag)
{
if (parallelism_tag == parallel_unbalanced)
return for_each_template_random_access_ed(begin, end, user_op,
functionality, reduction,
reduction_start,
output, bound);
else if (parallelism_tag == parallel_omp_loop)
return for_each_template_random_access_omp_loop(begin, end, user_op,
functionality,
reduction,
reduction_start,
output, bound);
else if (parallelism_tag == parallel_omp_loop_static)
return for_each_template_random_access_omp_loop(begin, end, user_op,
functionality,
reduction,
reduction_start,
output, bound);
if (__parallelism_tag == parallel_unbalanced)
return for_each_template_random_access_ed(__begin, __end, __user_op,
__functionality, __reduction,
__reduction_start,
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop)
return for_each_template_random_access_omp_loop(__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop_static)
return for_each_template_random_access_omp_loop(__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
else //e. g. parallel_balanced
return for_each_template_random_access_workstealing(begin, end,
user_op,
functionality,
reduction,
reduction_start,
output, bound);
return for_each_template_random_access_workstealing(__begin, __end,
__user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
}
}

View File

@ -38,192 +38,192 @@
namespace __gnu_parallel
{
/** @brief Generic selector for embarrassingly parallel functions. */
template<typename It>
struct generic_for_each_selector
/** @brief Generic __selector for embarrassingly parallel functions. */
template<typename _It>
struct __generic_for_each_selector
{
/** @brief Iterator on last element processed; needed for some
/** @brief _Iterator on last element processed; needed for some
* algorithms (e. g. std::transform()).
*/
It finish_iterator;
_It finish_iterator;
};
/** @brief std::for_each() selector. */
template<typename It>
struct for_each_selector : public generic_for_each_selector<It>
template<typename _It>
struct __for_each_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param o Operator.
* @param i Iterator referencing object. */
template<typename Op>
* @param __o Operator.
* @param __i iterator referencing object. */
template<typename _Op>
bool
operator()(Op& o, It i)
operator()(_Op& __o, _It __i)
{
o(*i);
__o(*__i);
return true;
}
};
/** @brief std::generate() selector. */
template<typename It>
struct generate_selector : public generic_for_each_selector<It>
template<typename _It>
struct __generate_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param o Operator.
* @param i Iterator referencing object. */
template<typename Op>
* @param __o Operator.
* @param __i iterator referencing object. */
template<typename _Op>
bool
operator()(Op& o, It i)
operator()(_Op& __o, _It __i)
{
*i = o();
*__i = __o();
return true;
}
};
/** @brief std::fill() selector. */
template<typename It>
struct fill_selector : public generic_for_each_selector<It>
template<typename _It>
struct __fill_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param v Current value.
* @param i Iterator referencing object. */
* @param __v Current value.
* @param __i iterator referencing object. */
template<typename Val>
bool
operator()(Val& v, It i)
operator()(Val& __v, _It __i)
{
*i = v;
*__i = __v;
return true;
}
};
/** @brief std::transform() selector, one input sequence variant. */
template<typename It>
struct transform1_selector : public generic_for_each_selector<It>
/** @brief std::transform() __selector, one input sequence variant. */
template<typename _It>
struct __transform1_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param o Operator.
* @param i Iterator referencing object. */
template<typename Op>
* @param __o Operator.
* @param __i iterator referencing object. */
template<typename _Op>
bool
operator()(Op& o, It i)
operator()(_Op& __o, _It __i)
{
*i.second = o(*i.first);
*__i.second = __o(*__i.first);
return true;
}
};
/** @brief std::transform() selector, two input sequences variant. */
template<typename It>
struct transform2_selector : public generic_for_each_selector<It>
/** @brief std::transform() __selector, two input sequences variant. */
template<typename _It>
struct __transform2_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param o Operator.
* @param i Iterator referencing object. */
template<typename Op>
* @param __o Operator.
* @param __i iterator referencing object. */
template<typename _Op>
bool
operator()(Op& o, It i)
operator()(_Op& __o, _It __i)
{
*i.third = o(*i.first, *i.second);
*__i.__third = __o(*__i.__first, *__i.__second);
return true;
}
};
/** @brief std::replace() selector. */
template<typename It, typename T>
struct replace_selector : public generic_for_each_selector<It>
template<typename _It, typename _Tp>
struct __replace_selector : public __generic_for_each_selector<_It>
{
/** @brief Value to replace with. */
const T& new_val;
const _Tp& __new_val;
/** @brief Constructor
* @param new_val Value to replace with. */
* @param __new_val Value to replace with. */
explicit
replace_selector(const T &new_val) : new_val(new_val) {}
__replace_selector(const _Tp &__new_val) : __new_val(__new_val) {}
/** @brief Functor execution.
* @param v Current value.
* @param i Iterator referencing object. */
* @param __v Current value.
* @param __i iterator referencing object. */
bool
operator()(T& v, It i)
operator()(_Tp& __v, _It __i)
{
if (*i == v)
*i = new_val;
if (*__i == __v)
*__i = __new_val;
return true;
}
};
/** @brief std::replace() selector. */
template<typename It, typename Op, typename T>
struct replace_if_selector : public generic_for_each_selector<It>
template<typename _It, typename _Op, typename _Tp>
struct __replace_if_selector : public __generic_for_each_selector<_It>
{
/** @brief Value to replace with. */
const T& new_val;
const _Tp& __new_val;
/** @brief Constructor.
* @param new_val Value to replace with. */
* @param __new_val Value to replace with. */
explicit
replace_if_selector(const T &new_val) : new_val(new_val) { }
__replace_if_selector(const _Tp &__new_val) : __new_val(__new_val) { }
/** @brief Functor execution.
* @param o Operator.
* @param i Iterator referencing object. */
* @param __o Operator.
* @param __i iterator referencing object. */
bool
operator()(Op& o, It i)
operator()(_Op& __o, _It __i)
{
if (o(*i))
*i = new_val;
if (__o(*__i))
*__i = __new_val;
return true;
}
};
/** @brief std::count() selector. */
template<typename It, typename Diff>
struct count_selector : public generic_for_each_selector<It>
template<typename _It, typename _Diff>
struct __count_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param v Current value.
* @param i Iterator referencing object.
* @param __v Current value.
* @param __i iterator referencing object.
* @return 1 if count, 0 if does not count. */
template<typename Val>
Diff
operator()(Val& v, It i)
{ return (v == *i) ? 1 : 0; }
_Diff
operator()(Val& __v, _It __i)
{ return (__v == *__i) ? 1 : 0; }
};
/** @brief std::count_if () selector. */
template<typename It, typename Diff>
struct count_if_selector : public generic_for_each_selector<It>
template<typename _It, typename _Diff>
struct __count_if_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param o Operator.
* @param i Iterator referencing object.
* @param __o Operator.
* @param __i iterator referencing object.
* @return 1 if count, 0 if does not count. */
template<typename Op>
Diff
operator()(Op& o, It i)
{ return (o(*i)) ? 1 : 0; }
template<typename _Op>
_Diff
operator()(_Op& __o, _It __i)
{ return (__o(*__i)) ? 1 : 0; }
};
/** @brief std::accumulate() selector. */
template<typename It>
struct accumulate_selector : public generic_for_each_selector<It>
template<typename _It>
struct __accumulate_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param o Operator (unused).
* @param i Iterator referencing object.
* @param __o Operator (unused).
* @param __i iterator referencing object.
* @return The current value. */
template<typename Op>
typename std::iterator_traits<It>::value_type operator()(Op o, It i)
{ return *i; }
template<typename _Op>
typename std::iterator_traits<_It>::value_type operator()(_Op __o, _It __i)
{ return *__i; }
};
/** @brief std::inner_product() selector. */
template<typename It, typename It2, typename T>
struct inner_product_selector : public generic_for_each_selector<It>
template<typename _It, typename It2, typename _Tp>
struct __inner_product_selector : public __generic_for_each_selector<_It>
{
/** @brief Begin iterator of first sequence. */
It begin1_iterator;
_It __begin1_iterator;
/** @brief Begin iterator of second sequence. */
It2 begin2_iterator;
@ -232,50 +232,50 @@ namespace __gnu_parallel
* @param b1 Begin iterator of first sequence.
* @param b2 Begin iterator of second sequence. */
explicit
inner_product_selector(It b1, It2 b2)
: begin1_iterator(b1), begin2_iterator(b2) { }
__inner_product_selector(_It b1, It2 b2)
: __begin1_iterator(b1), begin2_iterator(b2) { }
/** @brief Functor execution.
* @param mult Multiplication functor.
* @param current Iterator referencing object.
* @return Inner product elemental result. */
template<typename Op>
T
operator()(Op mult, It current)
* @param __mult Multiplication functor.
* @param __current iterator referencing object.
* @return Inner product elemental __result. */
template<typename _Op>
_Tp
operator()(_Op __mult, _It __current)
{
typename std::iterator_traits<It>::difference_type position
= current - begin1_iterator;
return mult(*current, *(begin2_iterator + position));
typename std::iterator_traits<_It>::difference_type __position
= __current - __begin1_iterator;
return __mult(*__current, *(begin2_iterator + __position));
}
};
/** @brief Selector that just returns the passed iterator. */
template<typename It>
struct identity_selector : public generic_for_each_selector<It>
template<typename _It>
struct __identity_selector : public __generic_for_each_selector<_It>
{
/** @brief Functor execution.
* @param o Operator (unused).
* @param i Iterator referencing object.
* @param __o Operator (unused).
* @param __i iterator referencing object.
* @return Passed iterator. */
template<typename Op>
It
operator()(Op o, It i)
{ return i; }
template<typename _Op>
_It
operator()(_Op __o, _It __i)
{ return __i; }
};
/** @brief Selector that returns the difference between two adjacent
* elements.
* __elements.
*/
template<typename It>
struct adjacent_difference_selector : public generic_for_each_selector<It>
template<typename _It>
struct __adjacent_difference_selector : public __generic_for_each_selector<_It>
{
template<typename Op>
template<typename _Op>
bool
operator()(Op& o, It i)
operator()(_Op& __o, _It __i)
{
typename It::first_type go_back_one = i.first;
--go_back_one;
*i.second = o(*i.first, *go_back_one);
typename _It::first_type __go_back_one = __i.first;
--__go_back_one;
*__i.__second = __o(*__i.__first, *__go_back_one);
return true;
}
};
@ -283,77 +283,77 @@ namespace __gnu_parallel
// XXX move into type_traits?
/** @brief Functor doing nothing
*
* For some reduction tasks (this is not a function object, but is
* passed as selector dummy parameter.
* For some __reduction tasks (this is not a function object, but is
* passed as __selector __dummy parameter.
*/
struct nothing
struct _Nothing
{
/** @brief Functor execution.
* @param i Iterator referencing object. */
template<typename It>
* @param __i iterator referencing object. */
template<typename _It>
void
operator()(It i) { }
operator()(_It __i) { }
};
/** @brief Reduction function doing nothing. */
struct dummy_reduct
struct _DummyReduct
{
bool
operator()(bool /*x*/, bool /*y*/) const
operator()(bool /*__x*/, bool /*__y*/) const
{ return true; }
};
/** @brief Reduction for finding the maximum element, using a comparator. */
template<typename Comp, typename It>
struct min_element_reduct
template<typename _Compare, typename _It>
struct __min_element_reduct
{
Comp& comp;
_Compare& __comp;
explicit
min_element_reduct(Comp &c) : comp(c) { }
__min_element_reduct(_Compare &__c) : __comp(__c) { }
It
operator()(It x, It y)
_It
operator()(_It __x, _It __y)
{
if (comp(*x, *y))
return x;
if (__comp(*__x, *__y))
return __x;
else
return y;
return __y;
}
};
/** @brief Reduction for finding the maximum element, using a comparator. */
template<typename Comp, typename It>
struct max_element_reduct
template<typename _Compare, typename _It>
struct __max_element_reduct
{
Comp& comp;
_Compare& __comp;
explicit
max_element_reduct(Comp& c) : comp(c) { }
__max_element_reduct(_Compare& __c) : __comp(__c) { }
It
operator()(It x, It y)
_It
operator()(_It __x, _It __y)
{
if (comp(*x, *y))
return y;
if (__comp(*__x, *__y))
return __y;
else
return x;
return __x;
}
};
/** @brief General reduction, using a binary operator. */
template<typename BinOp>
struct accumulate_binop_reduct
template<typename _BinOp>
struct __accumulate_binop_reduct
{
BinOp& binop;
_BinOp& __binop;
explicit
accumulate_binop_reduct(BinOp& b) : binop(b) { }
__accumulate_binop_reduct(_BinOp& __b) : __binop(__b) { }
template<typename Result, typename Addend>
Result
operator()(const Result& x, const Addend& y)
{ return binop(x, y); }
template<typename _Result, typename _Addend>
_Result
operator()(const _Result& __x, const _Addend& __y)
{ return __binop(__x, __y); }
};
}

View File

@ -40,160 +40,160 @@ namespace __gnu_parallel
/** @brief A pair of iterators. The usual iterator operations are
* applied to both child iterators.
*/
template<typename Iterator1, typename Iterator2, typename IteratorCategory>
class iterator_pair : public std::pair<Iterator1, Iterator2>
template<typename _Iterator1, typename _Iterator2, typename _IteratorCategory>
class _IteratorPair : public std::pair<_Iterator1, _Iterator2>
{
private:
typedef iterator_pair<Iterator1, Iterator2, IteratorCategory> type;
typedef std::pair<Iterator1, Iterator2> base_type;
typedef _IteratorPair<_Iterator1, _Iterator2, _IteratorCategory> _Self;
typedef std::pair<_Iterator1, _Iterator2> _Base;
public:
typedef IteratorCategory iterator_category;
typedef _IteratorCategory iterator_category;
typedef void value_type;
typedef std::iterator_traits<Iterator1> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef type* pointer;
typedef type& reference;
typedef std::iterator_traits<_Iterator1> _TraitsType;
typedef typename _TraitsType::difference_type difference_type;
typedef _Self* pointer;
typedef _Self& reference;
iterator_pair() { }
_IteratorPair() { }
iterator_pair(const Iterator1& first, const Iterator2& second)
: base_type(first, second) { }
_IteratorPair(const _Iterator1& __first, const _Iterator2& __second)
: _Base(__first, __second) { }
// Pre-increment operator.
type&
_Self&
operator++()
{
++base_type::first;
++base_type::second;
++_Base::first;
++_Base::second;
return *this;
}
// Post-increment operator.
const type
const _Self
operator++(int)
{ return type(base_type::first++, base_type::second++); }
{ return _Self(_Base::first++, _Base::second++); }
// Pre-decrement operator.
type&
_Self&
operator--()
{
--base_type::first;
--base_type::second;
--_Base::first;
--_Base::second;
return *this;
}
// Post-decrement operator.
const type
const _Self
operator--(int)
{ return type(base_type::first--, base_type::second--); }
{ return _Self(_Base::first--, _Base::second--); }
// Type conversion.
operator Iterator2() const
{ return base_type::second; }
operator _Iterator2() const
{ return _Base::second; }
type&
operator=(const type& other)
_Self&
operator=(const _Self& __other)
{
base_type::first = other.first;
base_type::second = other.second;
_Base::first = __other.first;
_Base::second = __other.second;
return *this;
}
type
operator+(difference_type delta) const
{ return type(base_type::first + delta, base_type::second + delta); }
_Self
operator+(difference_type __delta) const
{ return _Self(_Base::first + __delta, _Base::second + __delta); }
difference_type
operator-(const type& other) const
{ return base_type::first - other.first; }
operator-(const _Self& __other) const
{ return _Base::first - __other.first; }
};
/** @brief A triple of iterators. The usual iterator operations are
applied to all three child iterators.
*/
template<typename Iterator1, typename Iterator2, typename Iterator3,
typename IteratorCategory>
class iterator_triple
template<typename _Iterator1, typename _Iterator2, typename _Iterator3,
typename _IteratorCategory>
class _IteratorTriple
{
private:
typedef iterator_triple<Iterator1, Iterator2, Iterator3,
IteratorCategory> type;
typedef _IteratorTriple<_Iterator1, _Iterator2, _Iterator3,
_IteratorCategory> _Self;
public:
typedef IteratorCategory iterator_category;
typedef _IteratorCategory iterator_category;
typedef void value_type;
typedef typename std::iterator_traits<Iterator1>::difference_type
typedef typename std::iterator_traits<_Iterator1>::difference_type
difference_type;
typedef type* pointer;
typedef type& reference;
typedef _Self* pointer;
typedef _Self& reference;
Iterator1 first;
Iterator2 second;
Iterator3 third;
_Iterator1 __first;
_Iterator2 __second;
_Iterator3 __third;
iterator_triple() { }
_IteratorTriple() { }
iterator_triple(const Iterator1& _first, const Iterator2& _second,
const Iterator3& _third)
_IteratorTriple(const _Iterator1& _first, const _Iterator2& _second,
const _Iterator3& _third)
{
first = _first;
second = _second;
third = _third;
__first = _first;
__second = _second;
__third = _third;
}
// Pre-increment operator.
type&
_Self&
operator++()
{
++first;
++second;
++third;
++__first;
++__second;
++__third;
return *this;
}
// Post-increment operator.
const type
const _Self
operator++(int)
{ return type(first++, second++, third++); }
{ return _Self(__first++, __second++, __third++); }
// Pre-decrement operator.
type&
_Self&
operator--()
{
--first;
--second;
--third;
--__first;
--__second;
--__third;
return *this;
}
// Post-decrement operator.
const type
const _Self
operator--(int)
{ return type(first--, second--, third--); }
{ return _Self(__first--, __second--, __third--); }
// Type conversion.
operator Iterator3() const
{ return third; }
operator _Iterator3() const
{ return __third; }
type&
operator=(const type& other)
_Self&
operator=(const _Self& __other)
{
first = other.first;
second = other.second;
third = other.third;
__first = __other.__first;
__second = __other.__second;
__third = __other.__third;
return *this;
}
type
operator+(difference_type delta) const
{ return type(first + delta, second + delta, third + delta); }
_Self
operator+(difference_type __delta) const
{ return _Self(__first + __delta, __second + __delta, __third + __delta); }
difference_type
operator-(const type& other) const
{ return first - other.first; }
operator-(const _Self& __other) const
{ return __first - __other.__first; }
};
}

View File

@ -3,12 +3,12 @@
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms
// software; you can redistribute __it and/or modify __it under the terms
// of the GNU General Public License as published by the Free Software
// Foundation; either version 3, or (at your option) any later
// version.
// This library is distributed in the hope that it will be useful, but
// This library is distributed in the hope that __it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
@ -23,7 +23,7 @@
// <http://www.gnu.org/licenses/>.
/** @file parallel/list_partition.h
* @brief Functionality to split sequence referenced by only input
* @brief _Functionality to split __sequence referenced by only input
* iterators.
* This file is a GNU parallel extension to the Standard C++ Library.
*/
@ -39,137 +39,137 @@
namespace __gnu_parallel
{
/** @brief Shrinks and doubles the ranges.
* @param os_starts Start positions worked on (oversampled).
* @param count_to_two Counts up to 2.
* @param range_length Current length of a chunk.
* @param make_twice Whether the @c os_starts is allowed to be
* @param __os_starts Start positions worked on (oversampled).
* @param __count_to_two Counts up to 2.
* @param __range_length Current length of a chunk.
* @param __make_twice Whether the @__c __os_starts is allowed to be
* grown or not
*/
template<typename InputIterator>
template<typename _IIter>
void
shrink_and_double(std::vector<InputIterator>& os_starts,
size_t& count_to_two, size_t& range_length,
const bool make_twice)
__shrink_and_double(std::vector<_IIter>& __os_starts,
size_t& __count_to_two, size_t& __range_length,
const bool __make_twice)
{
++count_to_two;
if (not make_twice or count_to_two < 2)
shrink(os_starts, count_to_two, range_length);
++__count_to_two;
if (not __make_twice or __count_to_two < 2)
__shrink(__os_starts, __count_to_two, __range_length);
else
{
os_starts.resize((os_starts.size() - 1) * 2 + 1);
count_to_two = 0;
__os_starts.resize((__os_starts.size() - 1) * 2 + 1);
__count_to_two = 0;
}
}
/** @brief Combines two ranges into one and thus halves the number of ranges.
* @param os_starts Start positions worked on (oversampled).
* @param count_to_two Counts up to 2.
* @param range_length Current length of a chunk. */
template<typename InputIterator>
* @param __os_starts Start positions worked on (oversampled).
* @param __count_to_two Counts up to 2.
* @param __range_length Current length of a chunk. */
template<typename _IIter>
void
shrink(std::vector<InputIterator>& os_starts, size_t& count_to_two,
size_t& range_length)
__shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two,
size_t& __range_length)
{
for (typename std::vector<InputIterator>::size_type i = 0;
i <= (os_starts.size() / 2); ++i)
os_starts[i] = os_starts[i * 2];
range_length *= 2;
for (typename std::vector<_IIter>::size_type __i = 0;
__i <= (__os_starts.size() / 2); ++__i)
__os_starts[__i] = __os_starts[__i * 2];
__range_length *= 2;
}
/** @brief Splits a sequence given by input iterators into parts of
* almost equal size
*
* The function needs only one pass over the sequence.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param starts Start iterators for the resulting parts, dimension
* @c num_parts+1. For convenience, @c starts @c [num_parts]
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __starts Start iterators for the resulting parts, dimension
* @__c __num_parts+1. For convenience, @__c __starts @__c [__num_parts]
* contains the end iterator of the sequence.
* @param lengths Length of the resulting parts.
* @param num_parts Number of parts to split the sequence into.
* @param f Functor to be applied to each element by traversing it
* @param oversampling Oversampling factor. If 0, then the
* partitions will differ in at most @f$ \sqrt{\mathrm{end} -
* \mathrm{begin}} @f$ elements. Otherwise, the ratio between the
* longest and the shortest part is bounded by @f$
* 1/(\mathrm{oversampling} \cdot \mathrm{num\_parts}) @f$.
* @param __lengths Length of the resulting parts.
* @param __num_parts Number of parts to split the sequence into.
* @param __f Functor to be applied to each element by traversing __it
* @param __oversampling Oversampling factor. If 0, then the
* partitions will differ in at most @__f$ \sqrt{\mathrm{__end} -
* \mathrm{__begin}} @__f$ __elements. Otherwise, the ratio between the
* longest and the shortest part is bounded by @__f$
* 1/(\mathrm{__oversampling} \cdot \mathrm{num\_parts}) @__f$.
* @return Length of the whole sequence.
*/
template<typename InputIterator, typename FunctorType>
template<typename _IIter, typename _FunctorType>
size_t
list_partition(const InputIterator begin, const InputIterator end,
InputIterator* starts, size_t* lengths, const int num_parts,
FunctorType& f, int oversampling = 0)
list_partition(const _IIter __begin, const _IIter __end,
_IIter* __starts, size_t* __lengths, const int __num_parts,
_FunctorType& __f, int __oversampling = 0)
{
bool make_twice = false;
bool __make_twice = false;
// The resizing algorithm is chosen according to the oversampling factor.
if (oversampling == 0)
if (__oversampling == 0)
{
make_twice = true;
oversampling = 1;
__make_twice = true;
__oversampling = 1;
}
std::vector<InputIterator> os_starts(2 * oversampling * num_parts + 1);
std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1);
os_starts[0]= begin;
InputIterator prev = begin, it = begin;
size_t dist_limit = 0, dist = 0;
size_t cur = 1, next = 1;
size_t range_length = 1;
size_t count_to_two = 0;
while (it != end)
__os_starts[0]= __begin;
_IIter __prev = __begin, __it = __begin;
size_t __dist_limit = 0, __dist = 0;
size_t __cur = 1, __next = 1;
size_t __range_length = 1;
size_t __count_to_two = 0;
while (__it != __end)
{
cur = next;
for (; cur < os_starts.size() and it != end; ++cur)
__cur = __next;
for (; __cur < __os_starts.size() and __it != __end; ++__cur)
{
for (dist_limit += range_length;
dist < dist_limit and it != end; ++dist)
for (__dist_limit += __range_length;
__dist < __dist_limit and __it != __end; ++__dist)
{
f(it);
++it;
__f(__it);
++__it;
}
os_starts[cur] = it;
__os_starts[__cur] = __it;
}
// Must compare for end and not cur < os_starts.size() , because
// cur could be == os_starts.size() as well
if (it == end)
// Must compare for end and not __cur < __os_starts.size() , because
// __cur could be == __os_starts.size() as well
if (__it == __end)
break;
shrink_and_double(os_starts, count_to_two, range_length, make_twice);
next = os_starts.size() / 2 + 1;
__shrink_and_double(__os_starts, __count_to_two, __range_length, __make_twice);
__next = __os_starts.size() / 2 + 1;
}
// Calculation of the parts (one must be extracted from current
// because the partition beginning at end, consists only of
// Calculation of the parts (one must be extracted from __current
// because the partition beginning at __end, consists only of
// itself).
size_t size_part = (cur - 1) / num_parts;
int size_greater = static_cast<int>((cur - 1) % num_parts);
starts[0] = os_starts[0];
size_t __size_part = (__cur - 1) / __num_parts;
int __size_greater = static_cast<int>((__cur - 1) % __num_parts);
__starts[0] = __os_starts[0];
size_t index = 0;
size_t __index = 0;
// Smallest partitions.
for (int i = 1; i < (num_parts + 1 - size_greater); ++i)
for (int __i = 1; __i < (__num_parts + 1 - __size_greater); ++__i)
{
lengths[i - 1] = size_part * range_length;
index += size_part;
starts[i] = os_starts[index];
__lengths[__i - 1] = __size_part * __range_length;
__index += __size_part;
__starts[__i] = __os_starts[__index];
}
// Biggest partitions.
for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i)
for (int __i = __num_parts + 1 - __size_greater; __i <= __num_parts; ++__i)
{
lengths[i - 1] = (size_part+1) * range_length;
index += (size_part+1);
starts[i] = os_starts[index];
__lengths[__i - 1] = (__size_part+1) * __range_length;
__index += (__size_part+1);
__starts[__i] = __os_starts[__index];
}
// Correction of the end size (the end iteration has not finished).
lengths[num_parts - 1] -= (dist_limit - dist);
__lengths[__num_parts - 1] -= (__dist_limit - __dist);
return dist;
return __dist;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -37,224 +37,224 @@
namespace __gnu_parallel
{
/** @brief Merge routine being able to merge only the @c max_length
/** @brief Merge routine being able to merge only the @__c __max_length
* smallest elements.
*
* The @c begin iterators are advanced accordingly, they might not
* reach @c end, in contrast to the usual variant.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param end2 End iterator of second sequence.
* @param target Target begin iterator.
* @param max_length Maximum number of elements to merge.
* @param comp Comparator.
* The @__c __begin iterators are advanced accordingly, they might not
* reach @__c __end, in contrast to the usual variant.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __end2 End iterator of second sequence.
* @param __target Target begin iterator.
* @param __max_length Maximum number of elements to merge.
* @param __comp Comparator.
* @return Output end iterator. */
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename OutputIterator, typename _DifferenceTp,
typename Comparator>
OutputIterator
merge_advance_usual(RandomAccessIterator1& begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2& begin2,
RandomAccessIterator2 end2, OutputIterator target,
_DifferenceTp max_length, Comparator comp)
template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
_OutputIterator
__merge_advance_usual(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter2& __begin2,
_RAIter2 __end2, _OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
{
typedef _DifferenceTp difference_type;
while (begin1 != end1 && begin2 != end2 && max_length > 0)
typedef _DifferenceTp _DifferenceType;
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
{
// array1[i1] < array0[i0]
if (comp(*begin2, *begin1))
*target++ = *begin2++;
// array1[__i1] < array0[i0]
if (__comp(*__begin2, *__begin1))
*__target++ = *__begin2++;
else
*target++ = *begin1++;
--max_length;
*__target++ = *__begin1++;
--__max_length;
}
if (begin1 != end1)
if (__begin1 != __end1)
{
target = std::copy(begin1, begin1 + max_length, target);
begin1 += max_length;
__target = std::copy(__begin1, __begin1 + __max_length, __target);
__begin1 += __max_length;
}
else
{
target = std::copy(begin2, begin2 + max_length, target);
begin2 += max_length;
__target = std::copy(__begin2, __begin2 + __max_length, __target);
__begin2 += __max_length;
}
return target;
return __target;
}
/** @brief Merge routine being able to merge only the @c max_length
/** @brief Merge routine being able to merge only the @__c __max_length
* smallest elements.
*
* The @c begin iterators are advanced accordingly, they might not
* reach @c end, in contrast to the usual variant.
* The @__c __begin iterators are advanced accordingly, they might not
* reach @__c __end, in contrast to the usual variant.
* Specially designed code should allow the compiler to generate
* conditional moves instead of branches.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param end2 End iterator of second sequence.
* @param target Target begin iterator.
* @param max_length Maximum number of elements to merge.
* @param comp Comparator.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __end2 End iterator of second sequence.
* @param __target Target begin iterator.
* @param __max_length Maximum number of elements to merge.
* @param __comp Comparator.
* @return Output end iterator. */
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename OutputIterator, typename _DifferenceTp,
typename Comparator>
OutputIterator
merge_advance_movc(RandomAccessIterator1& begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2& begin2,
RandomAccessIterator2 end2,
OutputIterator target,
_DifferenceTp max_length, Comparator comp)
template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
_OutputIterator
__merge_advance_movc(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter2& __begin2,
_RAIter2 __end2,
_OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
{
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
typedef _DifferenceTp _DifferenceType;
typedef typename std::iterator_traits<_RAIter1>::value_type
value_type1;
typedef typename std::iterator_traits<RandomAccessIterator2>::value_type
typedef typename std::iterator_traits<_RAIter2>::value_type
value_type2;
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(max_length >= 0);
_GLIBCXX_PARALLEL_ASSERT(__max_length >= 0);
#endif
while (begin1 != end1 && begin2 != end2 && max_length > 0)
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
{
RandomAccessIterator1 next1 = begin1 + 1;
RandomAccessIterator2 next2 = begin2 + 1;
value_type1 element1 = *begin1;
value_type2 element2 = *begin2;
_RAIter1 __next1 = __begin1 + 1;
_RAIter2 __next2 = __begin2 + 1;
value_type1 __element1 = *__begin1;
value_type2 __element2 = *__begin2;
if (comp(element2, element1))
if (__comp(__element2, __element1))
{
element1 = element2;
begin2 = next2;
__element1 = __element2;
__begin2 = __next2;
}
else
begin1 = next1;
__begin1 = __next1;
*target = element1;
*__target = __element1;
++target;
--max_length;
++__target;
--__max_length;
}
if (begin1 != end1)
if (__begin1 != __end1)
{
target = std::copy(begin1, begin1 + max_length, target);
begin1 += max_length;
__target = std::copy(__begin1, __begin1 + __max_length, __target);
__begin1 += __max_length;
}
else
{
target = std::copy(begin2, begin2 + max_length, target);
begin2 += max_length;
__target = std::copy(__begin2, __begin2 + __max_length, __target);
__begin2 += __max_length;
}
return target;
return __target;
}
/** @brief Merge routine being able to merge only the @c max_length
/** @brief Merge routine being able to merge only the @__c __max_length
* smallest elements.
*
* The @c begin iterators are advanced accordingly, they might not
* reach @c end, in contrast to the usual variant.
* The @__c __begin iterators are advanced accordingly, they might not
* reach @__c __end, in contrast to the usual variant.
* Static switch on whether to use the conditional-move variant.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param end2 End iterator of second sequence.
* @param target Target begin iterator.
* @param max_length Maximum number of elements to merge.
* @param comp Comparator.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __end2 End iterator of second sequence.
* @param __target Target begin iterator.
* @param __max_length Maximum number of elements to merge.
* @param __comp Comparator.
* @return Output end iterator. */
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename OutputIterator, typename _DifferenceTp,
typename Comparator>
inline OutputIterator
merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1,
RandomAccessIterator2& begin2, RandomAccessIterator2 end2,
OutputIterator target, _DifferenceTp max_length,
Comparator comp)
template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
inline _OutputIterator
__merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2 __end2,
_OutputIterator __target, _DifferenceTp __max_length,
_Compare __comp)
{
_GLIBCXX_CALL(max_length)
_GLIBCXX_CALL(__max_length)
return merge_advance_movc(begin1, end1, begin2, end2, target,
max_length, comp);
return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target,
__max_length, __comp);
}
/** @brief Merge routine fallback to sequential in case the
iterators of the two input sequences are of different type.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param end2 End iterator of second sequence.
* @param target Target begin iterator.
* @param max_length Maximum number of elements to merge.
* @param comp Comparator.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __end2 End iterator of second sequence.
* @param __target Target begin iterator.
* @param __max_length Maximum number of elements to merge.
* @param __comp Comparator.
* @return Output end iterator. */
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename RandomAccessIterator3, typename Comparator>
inline RandomAccessIterator3
parallel_merge_advance(RandomAccessIterator1& begin1,
RandomAccessIterator1 end1,
RandomAccessIterator2& begin2,
template<typename _RAIter1, typename _RAIter2,
typename _RAIter3, typename _Compare>
inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter2& __begin2,
// different iterators, parallel implementation
// not available
RandomAccessIterator2 end2,
RandomAccessIterator3 target, typename
std::iterator_traits<RandomAccessIterator1>::
difference_type max_length, Comparator comp)
{ return merge_advance(begin1, end1, begin2, end2, target,
max_length, comp); }
_RAIter2 __end2,
_RAIter3 __target, typename
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
{ return __merge_advance(__begin1, __end1, __begin2, __end2, __target,
__max_length, __comp); }
/** @brief Parallel merge routine being able to merge only the @c
* max_length smallest elements.
/** @brief Parallel merge routine being able to merge only the @__c
* __max_length smallest elements.
*
* The @c begin iterators are advanced accordingly, they might not
* reach @c end, in contrast to the usual variant.
* The @__c __begin iterators are advanced accordingly, they might not
* reach @__c __end, in contrast to the usual variant.
* The functionality is projected onto parallel_multiway_merge.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param end2 End iterator of second sequence.
* @param target Target begin iterator.
* @param max_length Maximum number of elements to merge.
* @param comp Comparator.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __end2 End iterator of second sequence.
* @param __target Target begin iterator.
* @param __max_length Maximum number of elements to merge.
* @param __comp Comparator.
* @return Output end iterator.
*/
template<typename RandomAccessIterator1, typename RandomAccessIterator3,
typename Comparator>
inline RandomAccessIterator3
parallel_merge_advance(RandomAccessIterator1& begin1,
RandomAccessIterator1 end1,
RandomAccessIterator1& begin2,
RandomAccessIterator1 end2,
RandomAccessIterator3 target, typename
std::iterator_traits<RandomAccessIterator1>::
difference_type max_length, Comparator comp)
template<typename _RAIter1, typename _RAIter3,
typename _Compare>
inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter1& __begin2,
_RAIter1 __end2,
_RAIter3 __target, typename
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
{
typedef typename
std::iterator_traits<RandomAccessIterator1>::value_type value_type;
typedef typename std::iterator_traits<RandomAccessIterator1>::
difference_type difference_type1 /* == difference_type2 */;
typedef typename std::iterator_traits<RandomAccessIterator3>::
difference_type difference_type3;
typedef typename std::pair<RandomAccessIterator1, RandomAccessIterator1>
iterator_pair;
std::iterator_traits<_RAIter1>::value_type _ValueType;
typedef typename std::iterator_traits<_RAIter1>::
difference_type _DifferenceType1 /* == difference_type2 */;
typedef typename std::iterator_traits<_RAIter3>::
difference_type _DifferenceType3;
typedef typename std::pair<_RAIter1, _RAIter1>
_IteratorPair;
iterator_pair
seqs[2] = { std::make_pair(begin1, end1),
std::make_pair(begin2, end2) };
RandomAccessIterator3
target_end = parallel_multiway_merge
< /* stable = */ true, /* sentinels = */ false>(
seqs, seqs + 2, target,
_IteratorPair
seqs[2] = { std::make_pair(__begin1, __end1),
std::make_pair(__begin2, __end2) };
_RAIter3
__target_end = parallel_multiway_merge
< /* __stable = */ true, /* __sentinels = */ false>(
seqs, seqs + 2, __target,
multiway_merge_exact_splitting
< /* stable = */ true, iterator_pair*,
Comparator, difference_type1>,
max_length, comp, omp_get_max_threads());
< /* __stable = */ true, _IteratorPair*,
_Compare, _DifferenceType1>,
__max_length, __comp, omp_get_max_threads());
return target_end;
return __target_end;
}
} //namespace __gnu_parallel

View File

@ -23,7 +23,7 @@
// <http://www.gnu.org/licenses/>.
/** @file parallel/multiseq_selection.h
* @brief Functions to find elements of a certain global rank in
* @brief Functions to find elements of a certain global __rank in
* multiple sorted sequences. Also serves for splitting such
* sequence sets.
*
@ -50,275 +50,275 @@
namespace __gnu_parallel
{
/** @brief Compare a pair of types lexicographically, ascending. */
template<typename T1, typename T2, typename Comparator>
class lexicographic
: public std::binary_function<std::pair<T1, T2>, std::pair<T1, T2>, bool>
/** @brief Compare __a pair of types lexicographically, ascending. */
template<typename _T1, typename _T2, typename _Compare>
class _Lexicographic
: public std::binary_function<std::pair<_T1, _T2>, std::pair<_T1, _T2>, bool>
{
private:
Comparator& comp;
_Compare& __comp;
public:
lexicographic(Comparator& _comp) : comp(_comp) { }
_Lexicographic(_Compare& _comp) : __comp(_comp) { }
bool
operator()(const std::pair<T1, T2>& p1,
const std::pair<T1, T2>& p2) const
operator()(const std::pair<_T1, _T2>& __p1,
const std::pair<_T1, _T2>& __p2) const
{
if (comp(p1.first, p2.first))
if (__comp(__p1.first, __p2.first))
return true;
if (comp(p2.first, p1.first))
if (__comp(__p2.first, __p1.first))
return false;
// Firsts are equal.
return p1.second < p2.second;
return __p1.second < __p2.second;
}
};
/** @brief Compare a pair of types lexicographically, descending. */
template<typename T1, typename T2, typename Comparator>
class lexicographic_reverse : public std::binary_function<T1, T2, bool>
/** @brief Compare __a pair of types lexicographically, descending. */
template<typename _T1, typename _T2, typename _Compare>
class _LexicographicReverse : public std::binary_function<_T1, _T2, bool>
{
private:
Comparator& comp;
_Compare& __comp;
public:
lexicographic_reverse(Comparator& _comp) : comp(_comp) { }
_LexicographicReverse(_Compare& _comp) : __comp(_comp) { }
bool
operator()(const std::pair<T1, T2>& p1,
const std::pair<T1, T2>& p2) const
operator()(const std::pair<_T1, _T2>& __p1,
const std::pair<_T1, _T2>& __p2) const
{
if (comp(p2.first, p1.first))
if (__comp(__p2.first, __p1.first))
return true;
if (comp(p1.first, p2.first))
if (__comp(__p1.first, __p2.first))
return false;
// Firsts are equal.
return p2.second < p1.second;
return __p2.second < __p1.second;
}
};
/**
* @brief Splits several sorted sequences at a certain global rank,
* @brief Splits several sorted sequences at __a certain global __rank,
* resulting in a splitting point for each sequence.
* The sequences are passed via a sequence of random-access
* The sequences are passed via __a __sequence of random-access
* iterator pairs, none of the sequences may be empty. If there
* are several equal elements across the split, the ones on the
* left side will be chosen from sequences with smaller number.
* @param begin_seqs Begin of the sequence of iterator pairs.
* @param end_seqs End of the sequence of iterator pairs.
* @param rank The global rank to partition at.
* @param begin_offsets A random-access sequence begin where the
* result will be stored in. Each element of the sequence is an
* __left side will be chosen from sequences with smaller number.
* @param __begin_seqs Begin of the sequence of iterator pairs.
* @param __end_seqs End of the sequence of iterator pairs.
* @param __rank The global __rank to partition at.
* @param __begin_offsets A random-access __sequence __begin where the
* __result will be stored in. Each element of the sequence is an
* iterator that points to the first element on the greater part of
* the respective sequence.
* @param comp The ordering functor, defaults to std::less<T>.
* the respective __sequence.
* @param __comp The ordering functor, defaults to std::less<_Tp>.
*/
template<typename RanSeqs, typename RankType, typename RankIterator,
typename Comparator>
template<typename _RanSeqs, typename _RankType, typename _RankIterator,
typename _Compare>
void
multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs,
RankType rank,
RankIterator begin_offsets,
Comparator comp = std::less<
multiseq_partition(_RanSeqs __begin_seqs, _RanSeqs __end_seqs,
_RankType __rank,
_RankIterator __begin_offsets,
_Compare __comp = std::less<
typename std::iterator_traits<typename
std::iterator_traits<RanSeqs>::value_type::
first_type>::value_type>()) // std::less<T>
std::iterator_traits<_RanSeqs>::value_type::
first_type>::value_type>()) // std::less<_Tp>
{
_GLIBCXX_CALL(end_seqs - begin_seqs)
_GLIBCXX_CALL(__end_seqs - __begin_seqs)
typedef typename std::iterator_traits<RanSeqs>::value_type::first_type
It;
typedef typename std::iterator_traits<It>::difference_type
difference_type;
typedef typename std::iterator_traits<It>::value_type value_type;
typedef typename std::iterator_traits<_RanSeqs>::value_type::first_type
_It;
typedef typename std::iterator_traits<_It>::difference_type
_DifferenceType;
typedef typename std::iterator_traits<_It>::value_type _ValueType;
lexicographic<value_type, int, Comparator> lcomp(comp);
lexicographic_reverse<value_type, int, Comparator> lrcomp(comp);
_Lexicographic<_ValueType, int, _Compare> __lcomp(__comp);
_LexicographicReverse<_ValueType, int, _Compare> __lrcomp(__comp);
// Number of sequences, number of elements in total (possibly
// including padding).
difference_type m = std::distance(begin_seqs, end_seqs), N = 0,
nmax, n, r;
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __N = 0,
__nmax, __n, __r;
for (int i = 0; i < m; i++)
for (int __i = 0; __i < __m; __i++)
{
N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
__N += std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second);
_GLIBCXX_PARALLEL_ASSERT(
std::distance(begin_seqs[i].first, begin_seqs[i].second) > 0);
std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second) > 0);
}
if (rank == N)
if (__rank == __N)
{
for (int i = 0; i < m; i++)
begin_offsets[i] = begin_seqs[i].second; // Very end.
// Return m - 1;
for (int __i = 0; __i < __m; __i++)
__begin_offsets[__i] = __begin_seqs[__i].second; // Very end.
// Return __m - 1;
return;
}
_GLIBCXX_PARALLEL_ASSERT(m != 0);
_GLIBCXX_PARALLEL_ASSERT(N != 0);
_GLIBCXX_PARALLEL_ASSERT(rank >= 0);
_GLIBCXX_PARALLEL_ASSERT(rank < N);
_GLIBCXX_PARALLEL_ASSERT(__m != 0);
_GLIBCXX_PARALLEL_ASSERT(__N != 0);
_GLIBCXX_PARALLEL_ASSERT(__rank >= 0);
_GLIBCXX_PARALLEL_ASSERT(__rank < __N);
difference_type* ns = new difference_type[m];
difference_type* a = new difference_type[m];
difference_type* b = new difference_type[m];
difference_type l;
_DifferenceType* __ns = new _DifferenceType[__m];
_DifferenceType* __a = new _DifferenceType[__m];
_DifferenceType* __b = new _DifferenceType[__m];
_DifferenceType __l;
ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
nmax = ns[0];
for (int i = 0; i < m; i++)
__ns[0] = std::distance(__begin_seqs[0].first, __begin_seqs[0].second);
__nmax = __ns[0];
for (int __i = 0; __i < __m; __i++)
{
ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
nmax = std::max(nmax, ns[i]);
__ns[__i] = std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second);
__nmax = std::max(__nmax, __ns[__i]);
}
r = __log2(nmax) + 1;
__r = __log2(__nmax) + 1;
// Pad all lists to this length, at least as long as any ns[i],
// equality iff nmax = 2^k - 1.
l = (1ULL << r) - 1;
// Pad all lists to this length, at least as long as any ns[__i],
// equality iff __nmax = 2^__k - 1.
__l = (1ULL << __r) - 1;
// From now on, including padding.
N = l * m;
__N = __l * __m;
for (int i = 0; i < m; i++)
for (int __i = 0; __i < __m; __i++)
{
a[i] = 0;
b[i] = l;
__a[__i] = 0;
__b[__i] = __l;
}
n = l / 2;
__n = __l / 2;
// Invariants:
// 0 <= a[i] <= ns[i], 0 <= b[i] <= l
// 0 <= __a[__i] <= __ns[__i], 0 <= __b[__i] <= __l
#define S(i) (begin_seqs[i].first)
#define __S(__i) (__begin_seqs[__i].first)
// Initial partition.
std::vector<std::pair<value_type, int> > sample;
std::vector<std::pair<_ValueType, int> > __sample;
for (int i = 0; i < m; i++)
if (n < ns[i]) //sequence long enough
sample.push_back(std::make_pair(S(i)[n], i));
__gnu_sequential::sort(sample.begin(), sample.end(), lcomp);
for (int __i = 0; __i < __m; __i++)
if (__n < __ns[__i]) //__sequence long enough
__sample.push_back(std::make_pair(__S(__i)[__n], __i));
__gnu_sequential::sort(__sample.begin(), __sample.end(), __lcomp);
for (int i = 0; i < m; i++) //conceptual infinity
if (n >= ns[i]) //sequence too short, conceptual infinity
sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
for (int __i = 0; __i < __m; __i++) //conceptual infinity
if (__n >= __ns[__i]) //__sequence too short, conceptual infinity
__sample.push_back(std::make_pair(__S(__i)[0] /*__dummy element*/, __i));
difference_type localrank = rank * m / N ;
_DifferenceType localrank = __rank * __m / __N ;
int j;
for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
a[sample[j].second] += n + 1;
for (; j < m; j++)
b[sample[j].second] -= n + 1;
int __j;
for (__j = 0; __j < localrank && ((__n + 1) <= __ns[__sample[__j].second]); ++__j)
__a[__sample[__j].second] += __n + 1;
for (; __j < __m; __j++)
__b[__sample[__j].second] -= __n + 1;
// Further refinement.
while (n > 0)
while (__n > 0)
{
n /= 2;
__n /= 2;
int lmax_seq = -1; // to avoid warning
const value_type* lmax = NULL; // impossible to avoid the warning?
for (int i = 0; i < m; i++)
int __lmax_seq = -1; // to avoid warning
const _ValueType* __lmax = NULL; // impossible to avoid the warning?
for (int __i = 0; __i < __m; __i++)
{
if (a[i] > 0)
if (__a[__i] > 0)
{
if (!lmax)
if (!__lmax)
{
lmax = &(S(i)[a[i] - 1]);
lmax_seq = i;
__lmax = &(__S(__i)[__a[__i] - 1]);
__lmax_seq = __i;
}
else
{
// Max, favor rear sequences.
if (!comp(S(i)[a[i] - 1], *lmax))
if (!__comp(__S(__i)[__a[__i] - 1], *__lmax))
{
lmax = &(S(i)[a[i] - 1]);
lmax_seq = i;
__lmax = &(__S(__i)[__a[__i] - 1]);
__lmax_seq = __i;
}
}
}
}
int i;
for (i = 0; i < m; i++)
int __i;
for (__i = 0; __i < __m; __i++)
{
difference_type middle = (b[i] + a[i]) / 2;
if (lmax && middle < ns[i] &&
lcomp(std::make_pair(S(i)[middle], i),
std::make_pair(*lmax, lmax_seq)))
a[i] = std::min(a[i] + n + 1, ns[i]);
_DifferenceType __middle = (__b[__i] + __a[__i]) / 2;
if (__lmax && __middle < __ns[__i] &&
__lcomp(std::make_pair(__S(__i)[__middle], __i),
std::make_pair(*__lmax, __lmax_seq)))
__a[__i] = std::min(__a[__i] + __n + 1, __ns[__i]);
else
b[i] -= n + 1;
__b[__i] -= __n + 1;
}
difference_type leftsize = 0, total = 0;
for (int i = 0; i < m; i++)
_DifferenceType __leftsize = 0, __total = 0;
for (int __i = 0; __i < __m; __i++)
{
leftsize += a[i] / (n + 1);
total += l / (n + 1);
__leftsize += __a[__i] / (__n + 1);
__total += __l / (__n + 1);
}
difference_type skew = static_cast<difference_type>
(static_cast<uint64>(total) * rank / N - leftsize);
_DifferenceType __skew = static_cast<_DifferenceType>
(static_cast<uint64>(__total) * __rank / __N - __leftsize);
if (skew > 0)
if (__skew > 0)
{
// Move to the left, find smallest.
std::priority_queue<std::pair<value_type, int>,
std::vector<std::pair<value_type, int> >,
lexicographic_reverse<value_type, int, Comparator> >
pq(lrcomp);
std::priority_queue<std::pair<_ValueType, int>,
std::vector<std::pair<_ValueType, int> >,
_LexicographicReverse<_ValueType, int, _Compare> >
__pq(__lrcomp);
for (int i = 0; i < m; i++)
if (b[i] < ns[i])
pq.push(std::make_pair(S(i)[b[i]], i));
for (int __i = 0; __i < __m; __i++)
if (__b[__i] < __ns[__i])
__pq.push(std::make_pair(__S(__i)[__b[__i]], __i));
for (; skew != 0 && !pq.empty(); --skew)
for (; __skew != 0 && !__pq.empty(); --__skew)
{
int source = pq.top().second;
pq.pop();
int source = __pq.top().second;
__pq.pop();
a[source] = std::min(a[source] + n + 1, ns[source]);
b[source] += n + 1;
__a[source] = std::min(__a[source] + __n + 1, __ns[source]);
__b[source] += __n + 1;
if (b[source] < ns[source])
pq.push(std::make_pair(S(source)[b[source]], source));
if (__b[source] < __ns[source])
__pq.push(std::make_pair(__S(source)[__b[source]], source));
}
}
else if (skew < 0)
else if (__skew < 0)
{
// Move to the right, find greatest.
std::priority_queue<std::pair<value_type, int>,
std::vector<std::pair<value_type, int> >,
lexicographic<value_type, int, Comparator> > pq(lcomp);
std::priority_queue<std::pair<_ValueType, int>,
std::vector<std::pair<_ValueType, int> >,
_Lexicographic<_ValueType, int, _Compare> > __pq(__lcomp);
for (int i = 0; i < m; i++)
if (a[i] > 0)
pq.push(std::make_pair(S(i)[a[i] - 1], i));
for (int __i = 0; __i < __m; __i++)
if (__a[__i] > 0)
__pq.push(std::make_pair(__S(__i)[__a[__i] - 1], __i));
for (; skew != 0; ++skew)
for (; __skew != 0; ++__skew)
{
int source = pq.top().second;
pq.pop();
int source = __pq.top().second;
__pq.pop();
a[source] -= n + 1;
b[source] -= n + 1;
__a[source] -= __n + 1;
__b[source] -= __n + 1;
if (a[source] > 0)
pq.push(std::make_pair(S(source)[a[source] - 1], source));
if (__a[source] > 0)
__pq.push(std::make_pair(__S(source)[__a[source] - 1], source));
}
}
}
// Postconditions:
// a[i] == b[i] in most cases, except when a[i] has been clamped
// __a[__i] == __b[__i] in most cases, except when __a[__i] has been clamped
// because of having reached the boundary
// Now return the result, calculate the offset.
@ -326,236 +326,236 @@ namespace __gnu_parallel
// Compare the keys on both edges of the border.
// Maximum of left edge, minimum of right edge.
value_type* maxleft = NULL;
value_type* minright = NULL;
for (int i = 0; i < m; i++)
_ValueType* __maxleft = NULL;
_ValueType* __minright = NULL;
for (int __i = 0; __i < __m; __i++)
{
if (a[i] > 0)
if (__a[__i] > 0)
{
if (!maxleft)
maxleft = &(S(i)[a[i] - 1]);
if (!__maxleft)
__maxleft = &(__S(__i)[__a[__i] - 1]);
else
{
// Max, favor rear sequences.
if (!comp(S(i)[a[i] - 1], *maxleft))
maxleft = &(S(i)[a[i] - 1]);
if (!__comp(__S(__i)[__a[__i] - 1], *__maxleft))
__maxleft = &(__S(__i)[__a[__i] - 1]);
}
}
if (b[i] < ns[i])
if (__b[__i] < __ns[__i])
{
if (!minright)
minright = &(S(i)[b[i]]);
if (!__minright)
__minright = &(__S(__i)[__b[__i]]);
else
{
// Min, favor fore sequences.
if (comp(S(i)[b[i]], *minright))
minright = &(S(i)[b[i]]);
if (__comp(__S(__i)[__b[__i]], *__minright))
__minright = &(__S(__i)[__b[__i]]);
}
}
}
int seq = 0;
for (int i = 0; i < m; i++)
begin_offsets[i] = S(i) + a[i];
int __seq = 0;
for (int __i = 0; __i < __m; __i++)
__begin_offsets[__i] = __S(__i) + __a[__i];
delete[] ns;
delete[] a;
delete[] b;
delete[] __ns;
delete[] __a;
delete[] __b;
}
/**
* @brief Selects the element at a certain global rank from several
* @brief Selects the element at __a certain global __rank from several
* sorted sequences.
*
* The sequences are passed via a sequence of random-access
* The sequences are passed via __a __sequence of random-access
* iterator pairs, none of the sequences may be empty.
* @param begin_seqs Begin of the sequence of iterator pairs.
* @param end_seqs End of the sequence of iterator pairs.
* @param rank The global rank to partition at.
* @param offset The rank of the selected element in the global
* @param __begin_seqs Begin of the sequence of iterator pairs.
* @param __end_seqs End of the sequence of iterator pairs.
* @param __rank The global __rank to partition at.
* @param __offset The rank of the selected element in the global
* subsequence of elements equal to the selected element. If the
* selected element is unique, this number is 0.
* @param comp The ordering functor, defaults to std::less.
* @param __comp The ordering functor, defaults to std::less.
*/
template<typename T, typename RanSeqs, typename RankType,
typename Comparator>
T
multiseq_selection(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank,
RankType& offset, Comparator comp = std::less<T>())
template<typename _Tp, typename _RanSeqs, typename _RankType,
typename _Compare>
_Tp
multiseq_selection(_RanSeqs __begin_seqs, _RanSeqs __end_seqs, _RankType __rank,
_RankType& __offset, _Compare __comp = std::less<_Tp>())
{
_GLIBCXX_CALL(end_seqs - begin_seqs)
_GLIBCXX_CALL(__end_seqs - __begin_seqs)
typedef typename std::iterator_traits<RanSeqs>::value_type::first_type
It;
typedef typename std::iterator_traits<It>::difference_type
difference_type;
typedef typename std::iterator_traits<_RanSeqs>::value_type::first_type
_It;
typedef typename std::iterator_traits<_It>::difference_type
_DifferenceType;
lexicographic<T, int, Comparator> lcomp(comp);
lexicographic_reverse<T, int, Comparator> lrcomp(comp);
_Lexicographic<_Tp, int, _Compare> __lcomp(__comp);
_LexicographicReverse<_Tp, int, _Compare> __lrcomp(__comp);
// Number of sequences, number of elements in total (possibly
// including padding).
difference_type m = std::distance(begin_seqs, end_seqs);
difference_type N = 0;
difference_type nmax, n, r;
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs);
_DifferenceType __N = 0;
_DifferenceType __nmax, __n, __r;
for (int i = 0; i < m; i++)
N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
for (int __i = 0; __i < __m; __i++)
__N += std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second);
if (m == 0 || N == 0 || rank < 0 || rank >= N)
if (__m == 0 || __N == 0 || __rank < 0 || __rank >= __N)
{
// Result undefined when there is no data or rank is outside bounds.
// _Result undefined when there is no data or __rank is outside bounds.
throw std::exception();
}
difference_type* ns = new difference_type[m];
difference_type* a = new difference_type[m];
difference_type* b = new difference_type[m];
difference_type l;
_DifferenceType* __ns = new _DifferenceType[__m];
_DifferenceType* __a = new _DifferenceType[__m];
_DifferenceType* __b = new _DifferenceType[__m];
_DifferenceType __l;
ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
nmax = ns[0];
for (int i = 0; i < m; ++i)
__ns[0] = std::distance(__begin_seqs[0].first, __begin_seqs[0].second);
__nmax = __ns[0];
for (int __i = 0; __i < __m; ++__i)
{
ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
nmax = std::max(nmax, ns[i]);
__ns[__i] = std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second);
__nmax = std::max(__nmax, __ns[__i]);
}
r = __log2(nmax) + 1;
__r = __log2(__nmax) + 1;
// Pad all lists to this length, at least as long as any ns[i],
// equality iff nmax = 2^k - 1
l = pow2(r) - 1;
// Pad all lists to this length, at least as long as any ns[__i],
// equality iff __nmax = 2^__k - 1
__l = pow2(__r) - 1;
// From now on, including padding.
N = l * m;
__N = __l * __m;
for (int i = 0; i < m; ++i)
for (int __i = 0; __i < __m; ++__i)
{
a[i] = 0;
b[i] = l;
__a[__i] = 0;
__b[__i] = __l;
}
n = l / 2;
__n = __l / 2;
// Invariants:
// 0 <= a[i] <= ns[i], 0 <= b[i] <= l
// 0 <= __a[__i] <= __ns[__i], 0 <= __b[__i] <= __l
#define S(i) (begin_seqs[i].first)
#define __S(__i) (__begin_seqs[__i].first)
// Initial partition.
std::vector<std::pair<T, int> > sample;
std::vector<std::pair<_Tp, int> > __sample;
for (int i = 0; i < m; i++)
if (n < ns[i])
sample.push_back(std::make_pair(S(i)[n], i));
__gnu_sequential::sort(sample.begin(), sample.end(),
lcomp, sequential_tag());
for (int __i = 0; __i < __m; __i++)
if (__n < __ns[__i])
__sample.push_back(std::make_pair(__S(__i)[__n], __i));
__gnu_sequential::sort(__sample.begin(), __sample.end(),
__lcomp, sequential_tag());
// Conceptual infinity.
for (int i = 0; i < m; i++)
if (n >= ns[i])
sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
for (int __i = 0; __i < __m; __i++)
if (__n >= __ns[__i])
__sample.push_back(std::make_pair(__S(__i)[0] /*__dummy element*/, __i));
difference_type localrank = rank * m / N ;
_DifferenceType localrank = __rank * __m / __N ;
int j;
for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
a[sample[j].second] += n + 1;
for (; j < m; ++j)
b[sample[j].second] -= n + 1;
int __j;
for (__j = 0; __j < localrank && ((__n + 1) <= __ns[__sample[__j].second]); ++__j)
__a[__sample[__j].second] += __n + 1;
for (; __j < __m; ++__j)
__b[__sample[__j].second] -= __n + 1;
// Further refinement.
while (n > 0)
while (__n > 0)
{
n /= 2;
__n /= 2;
const T* lmax = NULL;
for (int i = 0; i < m; ++i)
const _Tp* __lmax = NULL;
for (int __i = 0; __i < __m; ++__i)
{
if (a[i] > 0)
if (__a[__i] > 0)
{
if (!lmax)
lmax = &(S(i)[a[i] - 1]);
if (!__lmax)
__lmax = &(__S(__i)[__a[__i] - 1]);
else
{
if (comp(*lmax, S(i)[a[i] - 1])) //max
lmax = &(S(i)[a[i] - 1]);
if (__comp(*__lmax, __S(__i)[__a[__i] - 1])) //max
__lmax = &(__S(__i)[__a[__i] - 1]);
}
}
}
int i;
for (i = 0; i < m; i++)
int __i;
for (__i = 0; __i < __m; __i++)
{
difference_type middle = (b[i] + a[i]) / 2;
if (lmax && middle < ns[i] && comp(S(i)[middle], *lmax))
a[i] = std::min(a[i] + n + 1, ns[i]);
_DifferenceType __middle = (__b[__i] + __a[__i]) / 2;
if (__lmax && __middle < __ns[__i] && __comp(__S(__i)[__middle], *__lmax))
__a[__i] = std::min(__a[__i] + __n + 1, __ns[__i]);
else
b[i] -= n + 1;
__b[__i] -= __n + 1;
}
difference_type leftsize = 0, total = 0;
for (int i = 0; i < m; ++i)
_DifferenceType __leftsize = 0, __total = 0;
for (int __i = 0; __i < __m; ++__i)
{
leftsize += a[i] / (n + 1);
total += l / (n + 1);
__leftsize += __a[__i] / (__n + 1);
__total += __l / (__n + 1);
}
difference_type skew = ((unsigned long long)total * rank / N
- leftsize);
_DifferenceType __skew = ((unsigned long long)__total * __rank / __N
- __leftsize);
if (skew > 0)
if (__skew > 0)
{
// Move to the left, find smallest.
std::priority_queue<std::pair<T, int>,
std::vector<std::pair<T, int> >,
lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
std::priority_queue<std::pair<_Tp, int>,
std::vector<std::pair<_Tp, int> >,
_LexicographicReverse<_Tp, int, _Compare> > __pq(__lrcomp);
for (int i = 0; i < m; ++i)
if (b[i] < ns[i])
pq.push(std::make_pair(S(i)[b[i]], i));
for (int __i = 0; __i < __m; ++__i)
if (__b[__i] < __ns[__i])
__pq.push(std::make_pair(__S(__i)[__b[__i]], __i));
for (; skew != 0 && !pq.empty(); --skew)
for (; __skew != 0 && !__pq.empty(); --__skew)
{
int source = pq.top().second;
pq.pop();
int source = __pq.top().second;
__pq.pop();
a[source] = std::min(a[source] + n + 1, ns[source]);
b[source] += n + 1;
__a[source] = std::min(__a[source] + __n + 1, __ns[source]);
__b[source] += __n + 1;
if (b[source] < ns[source])
pq.push(std::make_pair(S(source)[b[source]], source));
if (__b[source] < __ns[source])
__pq.push(std::make_pair(__S(source)[__b[source]], source));
}
}
else if (skew < 0)
else if (__skew < 0)
{
// Move to the right, find greatest.
std::priority_queue<std::pair<T, int>,
std::vector<std::pair<T, int> >,
lexicographic<T, int, Comparator> > pq(lcomp);
std::priority_queue<std::pair<_Tp, int>,
std::vector<std::pair<_Tp, int> >,
_Lexicographic<_Tp, int, _Compare> > __pq(__lcomp);
for (int i = 0; i < m; ++i)
if (a[i] > 0)
pq.push(std::make_pair(S(i)[a[i] - 1], i));
for (int __i = 0; __i < __m; ++__i)
if (__a[__i] > 0)
__pq.push(std::make_pair(__S(__i)[__a[__i] - 1], __i));
for (; skew != 0; ++skew)
for (; __skew != 0; ++__skew)
{
int source = pq.top().second;
pq.pop();
int source = __pq.top().second;
__pq.pop();
a[source] -= n + 1;
b[source] -= n + 1;
__a[source] -= __n + 1;
__b[source] -= __n + 1;
if (a[source] > 0)
pq.push(std::make_pair(S(source)[a[source] - 1], source));
if (__a[source] > 0)
__pq.push(std::make_pair(__S(source)[__a[source] - 1], source));
}
}
}
// Postconditions:
// a[i] == b[i] in most cases, except when a[i] has been clamped
// __a[__i] == __b[__i] in most cases, except when __a[__i] has been clamped
// because of having reached the boundary
// Now return the result, calculate the offset.
@ -563,71 +563,71 @@ namespace __gnu_parallel
// Compare the keys on both edges of the border.
// Maximum of left edge, minimum of right edge.
bool maxleftset = false, minrightset = false;
bool __maxleftset = false, __minrightset = false;
// Impossible to avoid the warning?
T maxleft, minright;
for (int i = 0; i < m; ++i)
_Tp __maxleft, __minright;
for (int __i = 0; __i < __m; ++__i)
{
if (a[i] > 0)
if (__a[__i] > 0)
{
if (!maxleftset)
if (!__maxleftset)
{
maxleft = S(i)[a[i] - 1];
maxleftset = true;
__maxleft = __S(__i)[__a[__i] - 1];
__maxleftset = true;
}
else
{
// Max.
if (comp(maxleft, S(i)[a[i] - 1]))
maxleft = S(i)[a[i] - 1];
if (__comp(__maxleft, __S(__i)[__a[__i] - 1]))
__maxleft = __S(__i)[__a[__i] - 1];
}
}
if (b[i] < ns[i])
if (__b[__i] < __ns[__i])
{
if (!minrightset)
if (!__minrightset)
{
minright = S(i)[b[i]];
minrightset = true;
__minright = __S(__i)[__b[__i]];
__minrightset = true;
}
else
{
// Min.
if (comp(S(i)[b[i]], minright))
minright = S(i)[b[i]];
if (__comp(__S(__i)[__b[__i]], __minright))
__minright = __S(__i)[__b[__i]];
}
}
}
// Minright is the splitter, in any case.
if (!maxleftset || comp(minright, maxleft))
if (!__maxleftset || __comp(__minright, __maxleft))
{
// Good luck, everything is split unambiguously.
offset = 0;
__offset = 0;
}
else
{
// We have to calculate an offset.
offset = 0;
__offset = 0;
for (int i = 0; i < m; ++i)
for (int __i = 0; __i < __m; ++__i)
{
difference_type lb = std::lower_bound(S(i), S(i) + ns[i],
minright,
comp) - S(i);
offset += a[i] - lb;
_DifferenceType lb = std::lower_bound(__S(__i), __S(__i) + __ns[__i],
__minright,
__comp) - __S(__i);
__offset += __a[__i] - lb;
}
}
delete[] ns;
delete[] a;
delete[] b;
delete[] __ns;
delete[] __a;
delete[] __b;
return minright;
return __minright;
}
}
#undef S
#undef __S
#endif /* _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H */

File diff suppressed because it is too large Load Diff

View File

@ -44,431 +44,431 @@ namespace __gnu_parallel
/** @brief Subsequence description. */
template<typename _DifferenceTp>
struct Piece
struct _Piece
{
typedef _DifferenceTp difference_type;
typedef _DifferenceTp _DifferenceType;
/** @brief Begin of subsequence. */
difference_type begin;
_DifferenceType __begin;
/** @brief End of subsequence. */
difference_type end;
_DifferenceType __end;
};
/** @brief Data accessed by all threads.
*
* PMWMS = parallel multiway mergesort */
template<typename RandomAccessIterator>
struct PMWMSSortingData
template<typename _RAIter>
struct _PMWMSSortingData
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
/** @brief Number of threads involved. */
thread_index_t num_threads;
_ThreadIndex __num_threads;
/** @brief Input begin. */
RandomAccessIterator source;
/** @brief Input __begin. */
_RAIter _M_source;
/** @brief Start indices, per thread. */
difference_type* starts;
_DifferenceType* _M_starts;
/** @brief Storage in which to sort. */
value_type** temporary;
_ValueType** _M_temporary;
/** @brief Samples. */
value_type* samples;
_ValueType* _M_samples;
/** @brief Offsets to add to the found positions. */
difference_type* offsets;
_DifferenceType* _M_offsets;
/** @brief Pieces of data to merge @c [thread][sequence] */
std::vector<Piece<difference_type> >* pieces;
/** @brief Pieces of data to merge @__c [thread][__sequence] */
std::vector<_Piece<_DifferenceType> >* _M_pieces;
};
/**
* @brief Select samples from a sequence.
* @param sd Pointer to algorithm data. Result will be placed in
* @c sd->samples.
* @param num_samples Number of samples to select.
* @brief Select _M_samples from a sequence.
* @param __sd Pointer to algorithm data. _Result will be placed in
* @__c __sd->_M_samples.
* @param __num_samples Number of _M_samples to select.
*/
template<typename RandomAccessIterator, typename _DifferenceTp>
template<typename _RAIter, typename _DifferenceTp>
void
determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
_DifferenceTp num_samples)
__determine_samples(_PMWMSSortingData<_RAIter>* __sd,
_DifferenceTp __num_samples)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef _DifferenceTp difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef _DifferenceTp _DifferenceType;
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
difference_type* es = new difference_type[num_samples + 2];
_DifferenceType* __es = new _DifferenceType[__num_samples + 2];
equally_split(sd->starts[iam + 1] - sd->starts[iam],
num_samples + 1, es);
equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
__num_samples + 1, __es);
for (difference_type i = 0; i < num_samples; ++i)
::new(&(sd->samples[iam * num_samples + i]))
value_type(sd->source[sd->starts[iam] + es[i + 1]]);
for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
_ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]);
delete[] es;
delete[] __es;
}
/** @brief Split consistently. */
template<bool exact, typename RandomAccessIterator,
typename Comparator, typename SortingPlacesIterator>
struct split_consistently
template<bool __exact, typename _RAIter,
typename _Compare, typename _SortingPlacesIterator>
struct _SplitConsistently
{
};
/** @brief Split by exact splitting. */
template<typename RandomAccessIterator, typename Comparator,
typename SortingPlacesIterator>
struct split_consistently
<true, RandomAccessIterator, Comparator, SortingPlacesIterator>
template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator>
struct _SplitConsistently
<true, _RAIter, _Compare, _SortingPlacesIterator>
{
void operator()(
const thread_index_t iam,
PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp,
const _ThreadIndex __iam,
_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp,
const typename
std::iterator_traits<RandomAccessIterator>::difference_type
num_samples)
std::iterator_traits<_RAIter>::difference_type
__num_samples)
const
{
# pragma omp barrier
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seqs(sd->num_threads);
for (thread_index_t s = 0; s < sd->num_threads; s++)
seqs[s] = std::make_pair(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]));
std::vector<std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
seqs(__sd->__num_threads);
for (_ThreadIndex __s = 0; __s < __sd->__num_threads; __s++)
seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]));
std::vector<SortingPlacesIterator> offsets(sd->num_threads);
std::vector<_SortingPlacesIterator> _M_offsets(__sd->__num_threads);
// if not last thread
if (iam < sd->num_threads - 1)
if (__iam < __sd->__num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
sd->starts[iam + 1], offsets.begin(), comp);
__sd->_M_starts[__iam + 1], _M_offsets.begin(), __comp);
for (int seq = 0; seq < sd->num_threads; seq++)
for (int __seq = 0; __seq < __sd->__num_threads; __seq++)
{
// for each sequence
if (iam < (sd->num_threads - 1))
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
if (__iam < (__sd->__num_threads - 1))
__sd->_M_pieces[__iam][__seq].__end = _M_offsets[__seq] - seqs[__seq].first;
else
// very end of this sequence
sd->pieces[iam][seq].end =
sd->starts[seq + 1] - sd->starts[seq];
__sd->_M_pieces[__iam][__seq].__end =
__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
}
# pragma omp barrier
for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
for (_ThreadIndex __seq = 0; __seq < __sd->__num_threads; __seq++)
{
// For each sequence.
if (iam > 0)
sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
if (__iam > 0)
__sd->_M_pieces[__iam][__seq].__begin = __sd->_M_pieces[__iam - 1][__seq].__end;
else
// Absolute beginning.
sd->pieces[iam][seq].begin = 0;
__sd->_M_pieces[__iam][__seq].__begin = 0;
}
}
};
/** @brief Split by sampling. */
template<typename RandomAccessIterator, typename Comparator,
typename SortingPlacesIterator>
struct split_consistently<false, RandomAccessIterator, Comparator,
SortingPlacesIterator>
template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator>
struct _SplitConsistently<false, _RAIter, _Compare,
_SortingPlacesIterator>
{
void operator()(
const thread_index_t iam,
PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp,
const _ThreadIndex __iam,
_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp,
const typename
std::iterator_traits<RandomAccessIterator>::difference_type
num_samples)
std::iterator_traits<_RAIter>::difference_type
__num_samples)
const
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
determine_samples(sd, num_samples);
__determine_samples(__sd, __num_samples);
# pragma omp barrier
# pragma omp single
__gnu_sequential::sort(sd->samples,
sd->samples + (num_samples * sd->num_threads),
comp);
__gnu_sequential::sort(__sd->_M_samples,
__sd->_M_samples + (__num_samples * __sd->__num_threads),
__comp);
# pragma omp barrier
for (thread_index_t s = 0; s < sd->num_threads; ++s)
for (_ThreadIndex __s = 0; __s < __sd->__num_threads; ++__s)
{
// For each sequence.
if (num_samples * iam > 0)
sd->pieces[iam][s].begin =
std::lower_bound(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * iam],
comp)
- sd->temporary[s];
if (__num_samples * __iam > 0)
__sd->_M_pieces[__iam][__s].__begin =
std::lower_bound(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
__sd->_M_samples[__num_samples * __iam],
__comp)
- __sd->_M_temporary[__s];
else
// Absolute beginning.
sd->pieces[iam][s].begin = 0;
__sd->_M_pieces[__iam][__s].__begin = 0;
if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
sd->pieces[iam][s].end =
std::lower_bound(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * (iam + 1)],
comp)
- sd->temporary[s];
if ((__num_samples * (__iam + 1)) < (__num_samples * __sd->__num_threads))
__sd->_M_pieces[__iam][__s].__end =
std::lower_bound(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
__sd->_M_samples[__num_samples * (__iam + 1)],
__comp)
- __sd->_M_temporary[__s];
else
// Absolute end.
sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
// Absolute __end.
__sd->_M_pieces[__iam][__s].__end = __sd->_M_starts[__s + 1] - __sd->_M_starts[__s];
}
}
};
template<bool stable, typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort
template<bool __stable, typename _RAIter, typename _Compare>
struct __possibly_stable_sort
{
};
template<typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
template<typename _RAIter, typename _Compare>
struct __possibly_stable_sort<true, _RAIter, _Compare>
{
void operator()(const RandomAccessIterator& begin,
const RandomAccessIterator& end, Comparator& comp) const
void operator()(const _RAIter& __begin,
const _RAIter& __end, _Compare& __comp) const
{
__gnu_sequential::stable_sort(begin, end, comp);
__gnu_sequential::stable_sort(__begin, __end, __comp);
}
};
template<typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
template<typename _RAIter, typename _Compare>
struct __possibly_stable_sort<false, _RAIter, _Compare>
{
void operator()(const RandomAccessIterator begin,
const RandomAccessIterator end, Comparator& comp) const
void operator()(const _RAIter __begin,
const _RAIter __end, _Compare& __comp) const
{
__gnu_sequential::sort(begin, end, comp);
__gnu_sequential::sort(__begin, __end, __comp);
}
};
template<bool stable, typename SeqRandomAccessIterator,
typename RandomAccessIterator, typename Comparator,
template<bool __stable, typename Seq_RAIter,
typename _RAIter, typename _Compare,
typename DiffType>
struct possibly_stable_multiway_merge
struct __possibly_stable_multiway_merge
{
};
template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
typename Comparator, typename DiffType>
struct possibly_stable_multiway_merge
<true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
template<typename Seq_RAIter, typename _RAIter,
typename _Compare, typename DiffType>
struct __possibly_stable_multiway_merge
<true, Seq_RAIter, _RAIter, _Compare,
DiffType>
{
void operator()(const SeqRandomAccessIterator& seqs_begin,
const SeqRandomAccessIterator& seqs_end,
const RandomAccessIterator& target,
Comparator& comp,
DiffType length_am) const
void operator()(const Seq_RAIter& __seqs_begin,
const Seq_RAIter& __seqs_end,
const _RAIter& __target,
_Compare& __comp,
DiffType __length_am) const
{
stable_multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
sequential_tag());
}
};
template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
typename Comparator, typename DiffType>
struct possibly_stable_multiway_merge
<false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
template<typename Seq_RAIter, typename _RAIter,
typename _Compare, typename DiffType>
struct __possibly_stable_multiway_merge
<false, Seq_RAIter, _RAIter, _Compare,
DiffType>
{
void operator()(const SeqRandomAccessIterator& seqs_begin,
const SeqRandomAccessIterator& seqs_end,
const RandomAccessIterator& target,
Comparator& comp,
DiffType length_am) const
void operator()(const Seq_RAIter& __seqs_begin,
const Seq_RAIter& __seqs_end,
const _RAIter& __target,
_Compare& __comp,
DiffType __length_am) const
{
multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
sequential_tag());
}
};
/** @brief PMWMS code executed by each thread.
* @param sd Pointer to algorithm data.
* @param comp Comparator.
* @param __sd Pointer to algorithm data.
* @param __comp Comparator.
*/
template<bool stable, bool exact, typename RandomAccessIterator,
typename Comparator>
template<bool __stable, bool __exact, typename _RAIter,
typename _Compare>
void
parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp)
parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
// Length of this thread's chunk, before merging.
difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
_DifferenceType __length_local = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
// Sort in temporary storage, leave space for sentinel.
typedef value_type* SortingPlacesIterator;
typedef _ValueType* _SortingPlacesIterator;
sd->temporary[iam] =
static_cast<value_type*>(
::operator new(sizeof(value_type) * (length_local + 1)));
__sd->_M_temporary[__iam] =
static_cast<_ValueType*>(
::operator new(sizeof(_ValueType) * (__length_local + 1)));
// Copy there.
std::uninitialized_copy(sd->source + sd->starts[iam],
sd->source + sd->starts[iam] + length_local,
sd->temporary[iam]);
std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
__sd->_M_source + __sd->_M_starts[__iam] + __length_local,
__sd->_M_temporary[__iam]);
possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
(sd->temporary[iam], sd->temporary[iam] + length_local, comp);
__possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
(__sd->_M_temporary[__iam], __sd->_M_temporary[__iam] + __length_local, __comp);
// Invariant: locally sorted subsequence in sd->temporary[iam],
// sd->temporary[iam] + length_local.
// Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
// __sd->_M_temporary[__iam] + __length_local.
// No barrier here: Synchronization is done by the splitting routine.
difference_type num_samples =
_Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
split_consistently
<exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
(iam, sd, comp, num_samples);
_DifferenceType __num_samples =
_Settings::get().sort_mwms_oversampling * __sd->__num_threads - 1;
_SplitConsistently
<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
(__iam, __sd, __comp, __num_samples);
// Offset from target begin, length after merging.
difference_type offset = 0, length_am = 0;
for (thread_index_t s = 0; s < sd->num_threads; s++)
// Offset from __target __begin, __length after merging.
_DifferenceType __offset = 0, __length_am = 0;
for (_ThreadIndex __s = 0; __s < __sd->__num_threads; __s++)
{
length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
offset += sd->pieces[iam][s].begin;
__length_am += __sd->_M_pieces[__iam][__s].__end - __sd->_M_pieces[__iam][__s].__begin;
__offset += __sd->_M_pieces[__iam][__s].__begin;
}
typedef std::vector<
std::pair<SortingPlacesIterator, SortingPlacesIterator> >
std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
seq_vector_type;
seq_vector_type seqs(sd->num_threads);
seq_vector_type seqs(__sd->__num_threads);
for (int s = 0; s < sd->num_threads; ++s)
for (int __s = 0; __s < __sd->__num_threads; ++__s)
{
seqs[s] =
std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
sd->temporary[s] + sd->pieces[iam][s].end);
seqs[__s] =
std::make_pair(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s].__begin,
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s].__end);
}
possibly_stable_multiway_merge<
stable,
__possibly_stable_multiway_merge<
__stable,
typename seq_vector_type::iterator,
RandomAccessIterator,
Comparator, difference_type>()
_RAIter,
_Compare, _DifferenceType>()
(seqs.begin(), seqs.end(),
sd->source + offset, comp,
length_am);
__sd->_M_source + __offset, __comp,
__length_am);
# pragma omp barrier
::operator delete(sd->temporary[iam]);
::operator delete(__sd->_M_temporary[__iam]);
}
/** @brief PMWMS main call.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param comp Comparator.
* @param n Length of sequence.
* @param num_threads Number of threads to use.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
*/
template<bool stable, bool exact, typename RandomAccessIterator,
typename Comparator>
template<bool __stable, bool __exact, typename _RAIter,
typename _Compare>
void
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp,
thread_index_t num_threads)
parallel_sort_mwms(_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
if (n <= 1)
if (__n <= 1)
return;
// at least one element per thread
if (num_threads > n)
num_threads = static_cast<thread_index_t>(n);
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
// shared variables
PMWMSSortingData<RandomAccessIterator> sd;
difference_type* starts;
_PMWMSSortingData<_RAIter> __sd;
_DifferenceType* _M_starts;
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
num_threads = omp_get_num_threads(); //no more threads than requested
__num_threads = omp_get_num_threads(); //no more threads than requested
# pragma omp single
{
sd.num_threads = num_threads;
sd.source = begin;
__sd.__num_threads = __num_threads;
__sd._M_source = __begin;
sd.temporary = new value_type*[num_threads];
__sd._M_temporary = new _ValueType*[__num_threads];
if (!exact)
if (!__exact)
{
difference_type size =
(_Settings::get().sort_mwms_oversampling * num_threads - 1)
* num_threads;
sd.samples = static_cast<value_type*>(
::operator new(size * sizeof(value_type)));
_DifferenceType size =
(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
* __num_threads;
__sd._M_samples = static_cast<_ValueType*>(
::operator new(size * sizeof(_ValueType)));
}
else
sd.samples = NULL;
__sd._M_samples = NULL;
sd.offsets = new difference_type[num_threads - 1];
sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
for (int s = 0; s < num_threads; ++s)
sd.pieces[s].resize(num_threads);
starts = sd.starts = new difference_type[num_threads + 1];
__sd._M_offsets = new _DifferenceType[__num_threads - 1];
__sd._M_pieces = new std::vector<_Piece<_DifferenceType> >[__num_threads];
for (int __s = 0; __s < __num_threads; ++__s)
__sd._M_pieces[__s].resize(__num_threads);
_M_starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
difference_type chunk_length = n / num_threads;
difference_type split = n % num_threads;
difference_type pos = 0;
for (int i = 0; i < num_threads; ++i)
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __split = __n % __num_threads;
_DifferenceType __pos = 0;
for (int __i = 0; __i < __num_threads; ++__i)
{
starts[i] = pos;
pos += (i < split) ? (chunk_length + 1) : chunk_length;
_M_starts[__i] = __pos;
__pos += (__i < __split) ? (__chunk_length + 1) : __chunk_length;
}
starts[num_threads] = pos;
_M_starts[__num_threads] = __pos;
} //single
// Now sort in parallel.
parallel_sort_mwms_pu<stable, exact>(&sd, comp);
parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
} //parallel
delete[] starts;
delete[] sd.temporary;
delete[] _M_starts;
delete[] __sd._M_temporary;
if (!exact)
::operator delete(sd.samples);
if (!__exact)
::operator delete(__sd._M_samples);
delete[] sd.offsets;
delete[] sd.pieces;
delete[] __sd._M_offsets;
delete[] __sd._M_pieces;
}
} //namespace __gnu_parallel

View File

@ -51,448 +51,448 @@ namespace std
namespace __parallel
{
// Sequential fallback.
template<typename InputIterator, typename T>
inline T
accumulate(InputIterator begin, InputIterator end, T init,
template<typename _IIter, typename _Tp>
inline _Tp
accumulate(_IIter __begin, _IIter __end, _Tp __init,
__gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::accumulate(begin, end, init); }
{ return _GLIBCXX_STD_P::accumulate(__begin, __end, __init); }
template<typename InputIterator, typename T, typename BinaryOperation>
inline T
accumulate(InputIterator begin, InputIterator end, T init,
BinaryOperation binary_op, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::accumulate(begin, end, init, binary_op); }
template<typename _IIter, typename _Tp, typename _BinaryOperation>
inline _Tp
accumulate(_IIter __begin, _IIter __end, _Tp __init,
_BinaryOperation __binary_op, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::accumulate(__begin, __end, __init, __binary_op); }
// Sequential fallback for input iterator case.
template<typename InputIterator, typename T, typename IteratorTag>
inline T
accumulate_switch(InputIterator begin, InputIterator end,
T init, IteratorTag)
{ return accumulate(begin, end, init, __gnu_parallel::sequential_tag()); }
template<typename _IIter, typename _Tp, typename _IteratorTag>
inline _Tp
__accumulate_switch(_IIter __begin, _IIter __end,
_Tp __init, _IteratorTag)
{ return accumulate(__begin, __end, __init, __gnu_parallel::sequential_tag()); }
template<typename InputIterator, typename T, typename BinaryOperation,
typename IteratorTag>
inline T
accumulate_switch(InputIterator begin, InputIterator end, T init,
BinaryOperation binary_op, IteratorTag)
{ return accumulate(begin, end, init, binary_op,
template<typename _IIter, typename _Tp, typename _BinaryOperation,
typename _IteratorTag>
inline _Tp
__accumulate_switch(_IIter __begin, _IIter __end, _Tp __init,
_BinaryOperation __binary_op, _IteratorTag)
{ return accumulate(__begin, __end, __init, __binary_op,
__gnu_parallel::sequential_tag()); }
// Parallel algorithm for random access iterators.
template<typename _RandomAccessIterator, typename T,
typename BinaryOperation>
T
accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end,
T init, BinaryOperation binary_op,
template<typename __RAIter, typename _Tp,
typename _BinaryOperation>
_Tp
__accumulate_switch(__RAIter __begin, __RAIter __end,
_Tp __init, _BinaryOperation __binary_op,
random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism_tag
__gnu_parallel::_Parallelism __parallelism_tag
= __gnu_parallel::parallel_unbalanced)
{
if (_GLIBCXX_PARALLEL_CONDITION(
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin)
>= __gnu_parallel::_Settings::get().accumulate_minimal_n
&& __gnu_parallel::is_parallel(parallelism_tag)))
&& __gnu_parallel::__is_parallel(__parallelism_tag)))
{
T res = init;
__gnu_parallel::accumulate_selector<_RandomAccessIterator>
my_selector;
_Tp __res = __init;
__gnu_parallel::__accumulate_selector<__RAIter>
__my_selector;
__gnu_parallel::
for_each_template_random_access_ed(begin, end,
__gnu_parallel::nothing(),
my_selector,
for_each_template_random_access_ed(__begin, __end,
__gnu_parallel::_Nothing(),
__my_selector,
__gnu_parallel::
accumulate_binop_reduct
<BinaryOperation>(binary_op),
res, res, -1);
return res;
__accumulate_binop_reduct
<_BinaryOperation>(__binary_op),
__res, __res, -1);
return __res;
}
else
return accumulate(begin, end, init, binary_op,
return accumulate(__begin, __end, __init, __binary_op,
__gnu_parallel::sequential_tag());
}
// Public interface.
template<typename InputIterator, typename T>
inline T
accumulate(InputIterator begin, InputIterator end, T init,
__gnu_parallel::_Parallelism parallelism_tag)
template<typename _IIter, typename _Tp>
inline _Tp
accumulate(_IIter __begin, _IIter __end, _Tp __init,
__gnu_parallel::_Parallelism __parallelism_tag)
{
typedef std::iterator_traits<InputIterator> iterator_traits;
typedef typename iterator_traits::value_type value_type;
typedef typename iterator_traits::iterator_category iterator_category;
typedef std::iterator_traits<_IIter> _IteratorTraits;
typedef typename _IteratorTraits::value_type _ValueType;
typedef typename _IteratorTraits::iterator_category _IteratorCategory;
return accumulate_switch(begin, end, init,
__gnu_parallel::plus<T, value_type>(),
iterator_category(), parallelism_tag);
return __accumulate_switch(__begin, __end, __init,
__gnu_parallel::_Plus<_Tp, _ValueType>(),
_IteratorCategory(), __parallelism_tag);
}
template<typename InputIterator, typename T>
inline T
accumulate(InputIterator begin, InputIterator end, T init)
template<typename _IIter, typename _Tp>
inline _Tp
accumulate(_IIter __begin, _IIter __end, _Tp __init)
{
typedef std::iterator_traits<InputIterator> iterator_traits;
typedef typename iterator_traits::value_type value_type;
typedef typename iterator_traits::iterator_category iterator_category;
typedef std::iterator_traits<_IIter> _IteratorTraits;
typedef typename _IteratorTraits::value_type _ValueType;
typedef typename _IteratorTraits::iterator_category _IteratorCategory;
return accumulate_switch(begin, end, init,
__gnu_parallel::plus<T, value_type>(),
iterator_category());
return __accumulate_switch(__begin, __end, __init,
__gnu_parallel::_Plus<_Tp, _ValueType>(),
_IteratorCategory());
}
template<typename InputIterator, typename T, typename BinaryOperation>
inline T
accumulate(InputIterator begin, InputIterator end, T init,
BinaryOperation binary_op,
__gnu_parallel::_Parallelism parallelism_tag)
template<typename _IIter, typename _Tp, typename _BinaryOperation>
inline _Tp
accumulate(_IIter __begin, _IIter __end, _Tp __init,
_BinaryOperation __binary_op,
__gnu_parallel::_Parallelism __parallelism_tag)
{
typedef iterator_traits<InputIterator> iterator_traits;
typedef typename iterator_traits::iterator_category iterator_category;
return accumulate_switch(begin, end, init, binary_op,
iterator_category(), parallelism_tag);
typedef iterator_traits<_IIter> _IteratorTraits;
typedef typename _IteratorTraits::iterator_category _IteratorCategory;
return __accumulate_switch(__begin, __end, __init, __binary_op,
_IteratorCategory(), __parallelism_tag);
}
template<typename InputIterator, typename T, typename BinaryOperation>
inline T
accumulate(InputIterator begin, InputIterator end, T init,
BinaryOperation binary_op)
template<typename _IIter, typename _Tp, typename _BinaryOperation>
inline _Tp
accumulate(_IIter __begin, _IIter __end, _Tp __init,
_BinaryOperation __binary_op)
{
typedef iterator_traits<InputIterator> iterator_traits;
typedef typename iterator_traits::iterator_category iterator_category;
return accumulate_switch(begin, end, init, binary_op,
iterator_category());
typedef iterator_traits<_IIter> _IteratorTraits;
typedef typename _IteratorTraits::iterator_category _IteratorCategory;
return __accumulate_switch(__begin, __end, __init, __binary_op,
_IteratorCategory());
}
// Sequential fallback.
template<typename InputIterator1, typename InputIterator2, typename T>
inline T
inner_product(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init,
template<typename _IIter1, typename _IIter2, typename _Tp>
inline _Tp
inner_product(_IIter1 __first1, _IIter1 __last1,
_IIter2 __first2, _Tp __init,
__gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init); }
{ return _GLIBCXX_STD_P::inner_product(__first1, __last1, __first2, __init); }
template<typename InputIterator1, typename InputIterator2, typename T,
template<typename _IIter1, typename _IIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2>
inline T
inner_product(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init, BinaryFunction1 binary_op1,
BinaryFunction2 binary_op2, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init,
binary_op1, binary_op2); }
inline _Tp
inner_product(_IIter1 __first1, _IIter1 __last1,
_IIter2 __first2, _Tp __init, BinaryFunction1 __binary_op1,
BinaryFunction2 __binary_op2, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::inner_product(__first1, __last1, __first2, __init,
__binary_op1, __binary_op2); }
// Parallel algorithm for random access iterators.
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
typename T, typename BinaryFunction1, typename BinaryFunction2>
T
inner_product_switch(RandomAccessIterator1 first1,
RandomAccessIterator1 last1,
RandomAccessIterator2 first2, T init,
BinaryFunction1 binary_op1,
BinaryFunction2 binary_op2,
template<typename _RAIter1, typename _RAIter2,
typename _Tp, typename BinaryFunction1, typename BinaryFunction2>
_Tp
__inner_product_switch(_RAIter1 __first1,
_RAIter1 __last1,
_RAIter2 __first2, _Tp __init,
BinaryFunction1 __binary_op1,
BinaryFunction2 __binary_op2,
random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism_tag
__gnu_parallel::_Parallelism __parallelism_tag
= __gnu_parallel::parallel_unbalanced)
{
if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1)
if (_GLIBCXX_PARALLEL_CONDITION((__last1 - __first1)
>= __gnu_parallel::_Settings::get().
accumulate_minimal_n
&& __gnu_parallel::
is_parallel(parallelism_tag)))
__is_parallel(__parallelism_tag)))
{
T res = init;
_Tp __res = __init;
__gnu_parallel::
inner_product_selector<RandomAccessIterator1,
RandomAccessIterator2, T> my_selector(first1, first2);
__inner_product_selector<_RAIter1,
_RAIter2, _Tp> __my_selector(__first1, __first2);
__gnu_parallel::
for_each_template_random_access_ed(first1, last1, binary_op2,
my_selector, binary_op1,
res, res, -1);
return res;
for_each_template_random_access_ed(__first1, __last1, __binary_op2,
__my_selector, __binary_op1,
__res, __res, -1);
return __res;
}
else
return inner_product(first1, last1, first2, init,
return inner_product(__first1, __last1, __first2, __init,
__gnu_parallel::sequential_tag());
}
// No parallelism for input iterators.
template<typename InputIterator1, typename InputIterator2, typename T,
template<typename _IIter1, typename _IIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2,
typename IteratorTag1, typename IteratorTag2>
inline T
inner_product_switch(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init,
BinaryFunction1 binary_op1,
BinaryFunction2 binary_op2,
IteratorTag1, IteratorTag2)
{ return inner_product(first1, last1, first2, init,
binary_op1, binary_op2,
typename _IteratorTag1, typename _IteratorTag2>
inline _Tp
__inner_product_switch(_IIter1 __first1, _IIter1 __last1,
_IIter2 __first2, _Tp __init,
BinaryFunction1 __binary_op1,
BinaryFunction2 __binary_op2,
_IteratorTag1, _IteratorTag2)
{ return inner_product(__first1, __last1, __first2, __init,
__binary_op1, __binary_op2,
__gnu_parallel::sequential_tag()); }
template<typename InputIterator1, typename InputIterator2, typename T,
template<typename _IIter1, typename _IIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2>
inline T
inner_product(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init, BinaryFunction1 binary_op1,
BinaryFunction2 binary_op2,
__gnu_parallel::_Parallelism parallelism_tag)
inline _Tp
inner_product(_IIter1 __first1, _IIter1 __last1,
_IIter2 __first2, _Tp __init, BinaryFunction1 __binary_op1,
BinaryFunction2 __binary_op2,
__gnu_parallel::_Parallelism __parallelism_tag)
{
typedef iterator_traits<InputIterator1> traits1_type;
typedef typename traits1_type::iterator_category iterator1_category;
typedef iterator_traits<_IIter1> _TraitsType1;
typedef typename _TraitsType1::iterator_category _IteratorCategory1;
typedef iterator_traits<InputIterator2> traits2_type;
typedef typename traits2_type::iterator_category iterator2_category;
typedef iterator_traits<_IIter2> _TraitsType2;
typedef typename _TraitsType2::iterator_category _IteratorCategory2;
return inner_product_switch(first1, last1, first2, init, binary_op1,
binary_op2, iterator1_category(),
iterator2_category(), parallelism_tag);
return __inner_product_switch(__first1, __last1, __first2, __init, __binary_op1,
__binary_op2, _IteratorCategory1(),
_IteratorCategory2(), __parallelism_tag);
}
template<typename InputIterator1, typename InputIterator2, typename T,
template<typename _IIter1, typename _IIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2>
inline T
inner_product(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init, BinaryFunction1 binary_op1,
BinaryFunction2 binary_op2)
inline _Tp
inner_product(_IIter1 __first1, _IIter1 __last1,
_IIter2 __first2, _Tp __init, BinaryFunction1 __binary_op1,
BinaryFunction2 __binary_op2)
{
typedef iterator_traits<InputIterator1> traits1_type;
typedef typename traits1_type::iterator_category iterator1_category;
typedef iterator_traits<_IIter1> _TraitsType1;
typedef typename _TraitsType1::iterator_category _IteratorCategory1;
typedef iterator_traits<InputIterator2> traits2_type;
typedef typename traits2_type::iterator_category iterator2_category;
typedef iterator_traits<_IIter2> _TraitsType2;
typedef typename _TraitsType2::iterator_category _IteratorCategory2;
return inner_product_switch(first1, last1, first2, init, binary_op1,
binary_op2, iterator1_category(),
iterator2_category());
return __inner_product_switch(__first1, __last1, __first2, __init, __binary_op1,
__binary_op2, _IteratorCategory1(),
_IteratorCategory2());
}
template<typename InputIterator1, typename InputIterator2, typename T>
inline T
inner_product(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init,
__gnu_parallel::_Parallelism parallelism_tag)
template<typename _IIter1, typename _IIter2, typename _Tp>
inline _Tp
inner_product(_IIter1 __first1, _IIter1 __last1,
_IIter2 __first2, _Tp __init,
__gnu_parallel::_Parallelism __parallelism_tag)
{
typedef iterator_traits<InputIterator1> traits_type1;
typedef iterator_traits<_IIter1> traits_type1;
typedef typename traits_type1::value_type value_type1;
typedef iterator_traits<InputIterator2> traits_type2;
typedef iterator_traits<_IIter2> traits_type2;
typedef typename traits_type2::value_type value_type2;
typedef typename
__gnu_parallel::multiplies<value_type1, value_type2>::result
multiplies_result_type;
return inner_product(first1, last1, first2, init,
__gnu_parallel::plus<T, multiplies_result_type>(),
__gnu_parallel::_Multiplies<value_type1, value_type2>::__result
_MultipliesResultType;
return inner_product(__first1, __last1, __first2, __init,
__gnu_parallel::_Plus<_Tp, _MultipliesResultType>(),
__gnu_parallel::
multiplies<value_type1, value_type2>(),
parallelism_tag);
_Multiplies<value_type1, value_type2>(),
__parallelism_tag);
}
template<typename InputIterator1, typename InputIterator2, typename T>
inline T
inner_product(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init)
template<typename _IIter1, typename _IIter2, typename _Tp>
inline _Tp
inner_product(_IIter1 __first1, _IIter1 __last1,
_IIter2 __first2, _Tp __init)
{
typedef iterator_traits<InputIterator1> traits_type1;
typedef iterator_traits<_IIter1> traits_type1;
typedef typename traits_type1::value_type value_type1;
typedef iterator_traits<InputIterator2> traits_type2;
typedef iterator_traits<_IIter2> traits_type2;
typedef typename traits_type2::value_type value_type2;
typedef typename
__gnu_parallel::multiplies<value_type1, value_type2>::result
multiplies_result_type;
return inner_product(first1, last1, first2, init,
__gnu_parallel::plus<T, multiplies_result_type>(),
__gnu_parallel::_Multiplies<value_type1, value_type2>::__result
_MultipliesResultType;
return inner_product(__first1, __last1, __first2, __init,
__gnu_parallel::_Plus<_Tp, _MultipliesResultType>(),
__gnu_parallel::
multiplies<value_type1, value_type2>());
_Multiplies<value_type1, value_type2>());
}
// Sequential fallback.
template<typename InputIterator, typename OutputIterator>
inline OutputIterator
partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
template<typename _IIter, typename _OutputIterator>
inline _OutputIterator
partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result,
__gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::partial_sum(begin, end, result); }
{ return _GLIBCXX_STD_P::partial_sum(__begin, __end, __result); }
// Sequential fallback.
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation>
inline OutputIterator
partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
BinaryOperation bin_op, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); }
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation>
inline _OutputIterator
partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result,
_BinaryOperation __bin_op, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::partial_sum(__begin, __end, __result, __bin_op); }
// Sequential fallback for input iterator case.
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation, typename IteratorTag1,
typename IteratorTag2>
inline OutputIterator
partial_sum_switch(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
IteratorTag1, IteratorTag2)
{ return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); }
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation, typename _IteratorTag1,
typename _IteratorTag2>
inline _OutputIterator
__partial_sum_switch(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
_IteratorTag1, _IteratorTag2)
{ return _GLIBCXX_STD_P::partial_sum(__begin, __end, __result, __bin_op); }
// Parallel algorithm for random access iterators.
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation>
OutputIterator
partial_sum_switch(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__partial_sum_switch(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
random_access_iterator_tag, random_access_iterator_tag)
{
if (_GLIBCXX_PARALLEL_CONDITION(
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin)
>= __gnu_parallel::_Settings::get().partial_sum_minimal_n))
return __gnu_parallel::parallel_partial_sum(begin, end,
result, bin_op);
return __gnu_parallel::__parallel_partial_sum(__begin, __end,
__result, __bin_op);
else
return partial_sum(begin, end, result, bin_op,
return partial_sum(__begin, __end, __result, __bin_op,
__gnu_parallel::sequential_tag());
}
// Public interface.
template<typename InputIterator, typename OutputIterator>
inline OutputIterator
partial_sum(InputIterator begin, InputIterator end, OutputIterator result)
template<typename _IIter, typename _OutputIterator>
inline _OutputIterator
partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result)
{
typedef typename iterator_traits<InputIterator>::value_type value_type;
return partial_sum(begin, end, result, std::plus<value_type>());
typedef typename iterator_traits<_IIter>::value_type _ValueType;
return partial_sum(__begin, __end, __result, std::plus<_ValueType>());
}
// Public interface
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation>
inline OutputIterator
partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
BinaryOperation binary_op)
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation>
inline _OutputIterator
partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result,
_BinaryOperation __binary_op)
{
typedef iterator_traits<InputIterator> traitsi_type;
typedef typename traitsi_type::iterator_category iteratori_category;
typedef iterator_traits<_IIter> traitsi_type;
typedef typename traitsi_type::iterator_category _IIteratorCategory;
typedef iterator_traits<OutputIterator> traitso_type;
typedef typename traitso_type::iterator_category iteratoro_category;
typedef iterator_traits<_OutputIterator> _OTraitsType;
typedef typename _OTraitsType::iterator_category _OIterCategory;
return partial_sum_switch(begin, end, result, binary_op,
iteratori_category(), iteratoro_category());
return __partial_sum_switch(__begin, __end, __result, __binary_op,
_IIteratorCategory(), _OIterCategory());
}
// Sequential fallback.
template<typename InputIterator, typename OutputIterator>
inline OutputIterator
adjacent_difference(InputIterator begin, InputIterator end,
OutputIterator result, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::adjacent_difference(begin, end, result); }
template<typename _IIter, typename _OutputIterator>
inline _OutputIterator
adjacent_difference(_IIter __begin, _IIter __end,
_OutputIterator __result, __gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::adjacent_difference(__begin, __end, __result); }
// Sequential fallback.
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation>
inline OutputIterator
adjacent_difference(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation>
inline _OutputIterator
adjacent_difference(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
__gnu_parallel::sequential_tag)
{ return _GLIBCXX_STD_P::adjacent_difference(begin, end, result, bin_op); }
{ return _GLIBCXX_STD_P::adjacent_difference(__begin, __end, __result, __bin_op); }
// Sequential fallback for input iterator case.
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation, typename IteratorTag1,
typename IteratorTag2>
inline OutputIterator
adjacent_difference_switch(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
IteratorTag1, IteratorTag2)
{ return adjacent_difference(begin, end, result, bin_op,
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation, typename _IteratorTag1,
typename _IteratorTag2>
inline _OutputIterator
__adjacent_difference_switch(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
_IteratorTag1, _IteratorTag2)
{ return adjacent_difference(__begin, __end, __result, __bin_op,
__gnu_parallel::sequential_tag()); }
// Parallel algorithm for random access iterators.
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation>
OutputIterator
adjacent_difference_switch(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__adjacent_difference_switch(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism_tag
__gnu_parallel::_Parallelism __parallelism_tag
= __gnu_parallel::parallel_balanced)
{
if (_GLIBCXX_PARALLEL_CONDITION(
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin)
>= __gnu_parallel::_Settings::get().adjacent_difference_minimal_n
&& __gnu_parallel::is_parallel(parallelism_tag)))
&& __gnu_parallel::__is_parallel(__parallelism_tag)))
{
bool dummy = true;
typedef __gnu_parallel::iterator_pair<InputIterator, OutputIterator,
random_access_iterator_tag> ip;
*result = *begin;
ip begin_pair(begin + 1, result + 1),
end_pair(end, result + (end - begin));
__gnu_parallel::adjacent_difference_selector<ip> functionality;
bool __dummy = true;
typedef __gnu_parallel::_IteratorPair<_IIter, _OutputIterator,
random_access_iterator_tag> _ItTrip;
*__result = *__begin;
_ItTrip begin_pair(__begin + 1, __result + 1),
end_pair(__end, __result + (__end - __begin));
__gnu_parallel::__adjacent_difference_selector<_ItTrip> __functionality;
__gnu_parallel::
for_each_template_random_access_ed(begin_pair, end_pair, bin_op,
functionality,
__gnu_parallel::dummy_reduct(),
dummy, dummy, -1);
return functionality.finish_iterator;
for_each_template_random_access_ed(begin_pair, end_pair, __bin_op,
__functionality,
__gnu_parallel::_DummyReduct(),
__dummy, __dummy, -1);
return __functionality.finish_iterator;
}
else
return adjacent_difference(begin, end, result, bin_op,
return adjacent_difference(__begin, __end, __result, __bin_op,
__gnu_parallel::sequential_tag());
}
// Public interface.
template<typename InputIterator, typename OutputIterator>
inline OutputIterator
adjacent_difference(InputIterator begin, InputIterator end,
OutputIterator result,
__gnu_parallel::_Parallelism parallelism_tag)
template<typename _IIter, typename _OutputIterator>
inline _OutputIterator
adjacent_difference(_IIter __begin, _IIter __end,
_OutputIterator __result,
__gnu_parallel::_Parallelism __parallelism_tag)
{
typedef iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
return adjacent_difference(begin, end, result, std::minus<value_type>(),
parallelism_tag);
typedef iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
return adjacent_difference(__begin, __end, __result, std::minus<_ValueType>(),
__parallelism_tag);
}
template<typename InputIterator, typename OutputIterator>
inline OutputIterator
adjacent_difference(InputIterator begin, InputIterator end,
OutputIterator result)
template<typename _IIter, typename _OutputIterator>
inline _OutputIterator
adjacent_difference(_IIter __begin, _IIter __end,
_OutputIterator __result)
{
typedef iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
return adjacent_difference(begin, end, result, std::minus<value_type>());
typedef iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
return adjacent_difference(__begin, __end, __result, std::minus<_ValueType>());
}
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation>
inline OutputIterator
adjacent_difference(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation binary_op,
__gnu_parallel::_Parallelism parallelism_tag)
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation>
inline _OutputIterator
adjacent_difference(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __binary_op,
__gnu_parallel::_Parallelism __parallelism_tag)
{
typedef iterator_traits<InputIterator> traitsi_type;
typedef typename traitsi_type::iterator_category iteratori_category;
typedef iterator_traits<_IIter> traitsi_type;
typedef typename traitsi_type::iterator_category _IIteratorCategory;
typedef iterator_traits<OutputIterator> traitso_type;
typedef typename traitso_type::iterator_category iteratoro_category;
typedef iterator_traits<_OutputIterator> _OTraitsType;
typedef typename _OTraitsType::iterator_category _OIterCategory;
return adjacent_difference_switch(begin, end, result, binary_op,
iteratori_category(),
iteratoro_category(), parallelism_tag);
return __adjacent_difference_switch(__begin, __end, __result, __binary_op,
_IIteratorCategory(),
_OIterCategory(), __parallelism_tag);
}
template<typename InputIterator, typename OutputIterator,
typename BinaryOperation>
inline OutputIterator
adjacent_difference(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation binary_op)
template<typename _IIter, typename _OutputIterator,
typename _BinaryOperation>
inline _OutputIterator
adjacent_difference(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __binary_op)
{
typedef iterator_traits<InputIterator> traitsi_type;
typedef typename traitsi_type::iterator_category iteratori_category;
typedef iterator_traits<_IIter> traitsi_type;
typedef typename traitsi_type::iterator_category _IIteratorCategory;
typedef iterator_traits<OutputIterator> traitso_type;
typedef typename traitso_type::iterator_category iteratoro_category;
typedef iterator_traits<_OutputIterator> _OTraitsType;
typedef typename _OTraitsType::iterator_category _OIterCategory;
return adjacent_difference_switch(begin, end, result, binary_op,
iteratori_category(),
iteratoro_category());
return __adjacent_difference_switch(__begin, __end, __result, __binary_op,
_IIteratorCategory(),
_OIterCategory());
}
} // end namespace
} // end namespace

View File

@ -52,7 +52,7 @@ namespace __parallel
template<typename _IIter, typename _Tp, typename _Tag>
_Tp
accumulate_switch(_IIter, _IIter, _Tp, _Tag);
__accumulate_switch(_IIter, _IIter, _Tp, _Tag);
template<typename _IIter, typename _Tp, typename _BinaryOper>
_Tp
@ -71,13 +71,13 @@ namespace __parallel
template<typename _IIter, typename _Tp, typename _BinaryOper,
typename _Tag>
_Tp
accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag);
__accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag);
template<typename _RAIter, typename _Tp, typename _BinaryOper>
_Tp
accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
__accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
template<typename _IIter, typename _OIter>
@ -111,15 +111,15 @@ namespace __parallel
template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2>
_OIter
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
_Tag1, _Tag2);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
template<typename _IIter1, typename _IIter2, typename _Tp>
@ -157,7 +157,7 @@ namespace __parallel
template<typename _RAIter1, typename _RAIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2>
_Tp
inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
__inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
BinaryFunction2, random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism
@ -167,7 +167,7 @@ namespace __parallel
typename _BinaryFunction1, typename _BinaryFunction2,
typename _Tag1, typename _Tag2>
_Tp
inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
__inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
_BinaryFunction2, _Tag1, _Tag2);
@ -182,7 +182,7 @@ namespace __parallel
template<typename _IIter, typename _OIter>
_OIter
partial_sum(_IIter, _IIter, _OIter result);
partial_sum(_IIter, _IIter, _OIter __result);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
@ -191,11 +191,11 @@ namespace __parallel
template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2>
_OIter
partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper,
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag, random_access_iterator_tag);
} // end namespace
} // end namespace

View File

@ -44,73 +44,73 @@ namespace __gnu_parallel
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using an OpenMP for loop.
*
* @param begin Begin iterator of element sequence.
* @param end End iterator of element sequence.
* @param o User-supplied functor (comparator, predicate, adding
* @param __begin Begin iterator of element __sequence.
* @param __end End iterator of element __sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, etc.).
* @param f Functor to "process" an element with op (depends on
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param r Functor to "add" a single result to the already
* processed elements (depends on functionality).
* @param base Base value for reduction.
* @param output Pointer to position where final result is written to
* @param bound Maximum number of elements processed (e. g. for
* @param __r Functor to "add" a single __result to the already
* processed __elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename RandomAccessIterator,
typename Op,
typename Fu,
typename Red,
typename Result>
Op
for_each_template_random_access_omp_loop(RandomAccessIterator begin,
RandomAccessIterator end,
Op o, Fu& f, Red r, Result base,
Result& output,
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_omp_loop(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r, _Result __base,
_Result& __output,
typename std::iterator_traits
<RandomAccessIterator>::
difference_type bound)
<_RAIter>::
difference_type __bound)
{
typedef typename
std::iterator_traits<RandomAccessIterator>::difference_type
difference_type;
std::iterator_traits<_RAIter>::difference_type
_DifferenceType;
difference_type length = end - begin;
thread_index_t num_threads =
__gnu_parallel::min<difference_type>(get_max_threads(), length);
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
Result *thread_results;
_Result *__thread_results;
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
thread_results = new Result[num_threads];
__num_threads = omp_get_num_threads();
__thread_results = new _Result[__num_threads];
for (thread_index_t i = 0; i < num_threads; ++i)
thread_results[i] = Result();
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__thread_results[__i] = _Result();
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
# pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
for (difference_type pos = 0; pos < length; ++pos)
thread_results[iam] =
r(thread_results[iam], f(o, begin+pos));
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] =
__r(__thread_results[__iam], __f(__o, __begin+__pos));
} //parallel
for (thread_index_t i = 0; i < num_threads; ++i)
output = r(output, thread_results[i]);
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__output = __r(__output, __thread_results[__i]);
delete [] thread_results;
delete [] __thread_results;
// Points to last element processed (needed as return value for
// some algorithms like transform).
f.finish_iterator = begin + length;
__f.finish_iterator = __begin + __length;
return o;
return __o;
}
} // end namespace

View File

@ -44,72 +44,72 @@ namespace __gnu_parallel
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using an OpenMP for loop with static scheduling.
*
* @param begin Begin iterator of element sequence.
* @param end End iterator of element sequence.
* @param o User-supplied functor (comparator, predicate, adding
* @param __begin Begin iterator of element __sequence.
* @param __end End iterator of element __sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, ...).
* @param f Functor to "process" an element with op (depends on
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param r Functor to "add" a single result to the already processed
* elements (depends on functionality).
* @param base Base value for reduction.
* @param output Pointer to position where final result is written to
* @param bound Maximum number of elements processed (e. g. for
* @param __r Functor to "add" a single __result to the already processed
* __elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename RandomAccessIterator,
typename Op,
typename Fu,
typename Red,
typename Result>
Op
for_each_template_random_access_omp_loop_static(RandomAccessIterator begin,
RandomAccessIterator end,
Op o, Fu& f, Red r,
Result base, Result& output,
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_omp_loop_static(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits
<RandomAccessIterator>::
difference_type bound)
<_RAIter>::
difference_type __bound)
{
typedef typename
std::iterator_traits<RandomAccessIterator>::difference_type
difference_type;
std::iterator_traits<_RAIter>::difference_type
_DifferenceType;
difference_type length = end - begin;
thread_index_t num_threads =
std::min<difference_type>(get_max_threads(), length);
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(), __length);
Result *thread_results;
_Result *__thread_results;
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
thread_results = new Result[num_threads];
__num_threads = omp_get_num_threads();
__thread_results = new _Result[__num_threads];
for (thread_index_t i = 0; i < num_threads; ++i)
thread_results[i] = Result();
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__thread_results[__i] = _Result();
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
# pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
for (difference_type pos = 0; pos < length; ++pos)
thread_results[iam] = r(thread_results[iam], f(o, begin+pos));
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] = __r(__thread_results[__iam], __f(__o, __begin+__pos));
} //parallel
for (thread_index_t i = 0; i < num_threads; ++i)
output = r(output, thread_results[i]);
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__output = __r(__output, __thread_results[__i]);
delete [] thread_results;
delete [] __thread_results;
// Points to last element processed (needed as return value for
// some algorithms like transform).
f.finish_iterator = begin + length;
__f.finish_iterator = __begin + __length;
return o;
return __o;
}
} // end namespace

View File

@ -45,89 +45,89 @@ namespace __gnu_parallel
* iterators, using hand-crafted parallelization by equal splitting
* the work.
*
* @param begin Begin iterator of element sequence.
* @param end End iterator of element sequence.
* @param o User-supplied functor (comparator, predicate, adding
* @param __begin Begin iterator of element __sequence.
* @param __end End iterator of element __sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, ...)
* @param f Functor to "process" an element with op (depends on
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param r Functor to "add" a single result to the already
* processed elements (depends on functionality).
* @param base Base value for reduction.
* @param output Pointer to position where final result is written to
* @param bound Maximum number of elements processed (e. g. for
* @param __r Functor to "add" a single __result to the already
* processed __elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename RandomAccessIterator,
typename Op,
typename Fu,
typename Red,
typename Result>
Op
for_each_template_random_access_ed(RandomAccessIterator begin,
RandomAccessIterator end,
Op o, Fu& f, Red r, Result base,
Result& output,
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_ed(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r, _Result __base,
_Result& __output,
typename std::iterator_traits
<RandomAccessIterator>::
difference_type bound)
<_RAIter>::
difference_type __bound)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
const difference_type length = end - begin;
Result *thread_results;
bool* constructed;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
const _DifferenceType __length = __end - __begin;
_Result *__thread_results;
bool* __constructed;
thread_index_t num_threads =
__gnu_parallel::min<difference_type>(get_max_threads(), length);
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
thread_results = static_cast<Result*>(
::operator new(num_threads * sizeof(Result)));
constructed = new bool[num_threads];
__num_threads = omp_get_num_threads();
__thread_results = static_cast<_Result*>(
::operator new(__num_threads * sizeof(_Result)));
__constructed = new bool[__num_threads];
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
// Neutral element.
Result* reduct = static_cast<Result*>(::operator new(sizeof(Result)));
_Result* __reduct = static_cast<_Result*>(::operator new(sizeof(_Result)));
difference_type
start = equally_split_point(length, num_threads, iam),
stop = equally_split_point(length, num_threads, iam + 1);
_DifferenceType
__start = equally_split_point(__length, __num_threads, __iam),
__stop = equally_split_point(__length, __num_threads, __iam + 1);
if (start < stop)
if (__start < __stop)
{
new(reduct) Result(f(o, begin + start));
++start;
constructed[iam] = true;
new(__reduct) _Result(__f(__o, __begin + __start));
++__start;
__constructed[__iam] = true;
}
else
constructed[iam] = false;
__constructed[__iam] = false;
for (; start < stop; ++start)
*reduct = r(*reduct, f(o, begin + start));
for (; __start < __stop; ++__start)
*__reduct = __r(*__reduct, __f(__o, __begin + __start));
thread_results[iam] = *reduct;
__thread_results[__iam] = *__reduct;
} //parallel
for (thread_index_t i = 0; i < num_threads; ++i)
if (constructed[i])
output = r(output, thread_results[i]);
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
if (__constructed[__i])
__output = __r(__output, __thread_results[__i]);
// Points to last element processed (needed as return value for
// some algorithms like transform).
f.finish_iterator = begin + length;
__f.finish_iterator = __begin + __length;
delete[] thread_results;
delete[] constructed;
delete[] __thread_results;
delete[] __constructed;
return o;
return __o;
}
} // end namespace

View File

@ -23,8 +23,8 @@
// <http://www.gnu.org/licenses/>.
/** @file parallel/partial_sum.h
* @brief Parallel implementation of std::partial_sum(), i. e. prefix
* sums.
* @brief Parallel implementation of std::partial_sum(), i.e. prefix
* sums.
* This file is a GNU parallel extension to the Standard C++ Library.
*/
@ -44,175 +44,175 @@ namespace __gnu_parallel
// Problem: there is no 0-element given.
/** @brief Base case prefix sum routine.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param result Begin iterator of output sequence.
* @param bin_op Associative binary function.
* @param value Start value. Must be passed since the neutral
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @param __value Start value. Must be passed since the neutral
* element is unknown in general.
* @return End iterator of output sequence. */
template<typename InputIterator,
typename OutputIterator,
typename BinaryOperation>
OutputIterator
parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
typename std::iterator_traits
<InputIterator>::value_type value)
<_IIter>::value_type __value)
{
if (begin == end)
return result;
if (__begin == __end)
return __result;
while (begin != end)
while (__begin != __end)
{
value = bin_op(value, *begin);
*result = value;
++result;
++begin;
__value = __bin_op(__value, *__begin);
*__result = __value;
++__result;
++__begin;
}
return result;
return __result;
}
/** @brief Parallel partial sum implementation, two-phase approach,
no recursion.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param result Begin iterator of output sequence.
* @param bin_op Associative binary function.
* @param n Length of sequence.
* @param num_threads Number of threads to use.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
* @return End iterator of output sequence.
*/
template<typename InputIterator,
typename OutputIterator,
typename BinaryOperation>
OutputIterator
parallel_partial_sum_linear(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum_linear(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
typename std::iterator_traits
<InputIterator>::difference_type n)
<_IIter>::difference_type __n)
{
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
if (begin == end)
return result;
if (__begin == __end)
return __result;
thread_index_t num_threads =
std::min<difference_type>(get_max_threads(), n - 1);
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(), __n - 1);
if (num_threads < 2)
if (__num_threads < 2)
{
*result = *begin;
return parallel_partial_sum_basecase(
begin + 1, end, result + 1, bin_op, *begin);
*__result = *__begin;
return __parallel_partial_sum_basecase(
__begin + 1, __end, __result + 1, __bin_op, *__begin);
}
difference_type* borders;
value_type* sums;
_DifferenceType* __borders;
_ValueType* __sums;
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
__num_threads = omp_get_num_threads();
borders = new difference_type[num_threads + 2];
__borders = new _DifferenceType[__num_threads + 2];
if (__s.partial_sum_dilation == 1.0f)
equally_split(n, num_threads + 1, borders);
equally_split(__n, __num_threads + 1, __borders);
else
{
difference_type chunk_length =
((double)n
/ ((double)num_threads + __s.partial_sum_dilation)),
borderstart = n - num_threads * chunk_length;
borders[0] = 0;
for (int i = 1; i < (num_threads + 1); ++i)
_DifferenceType __chunk_length =
((double)__n
/ ((double)__num_threads + __s.partial_sum_dilation)),
__borderstart = __n - __num_threads * __chunk_length;
__borders[0] = 0;
for (int __i = 1; __i < (__num_threads + 1); ++__i)
{
borders[i] = borderstart;
borderstart += chunk_length;
__borders[__i] = __borderstart;
__borderstart += __chunk_length;
}
borders[num_threads + 1] = n;
__borders[__num_threads + 1] = __n;
}
sums = static_cast<value_type*>(::operator new(sizeof(value_type)
* num_threads));
OutputIterator target_end;
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
* __num_threads));
_OutputIterator __target_end;
} //single
thread_index_t iam = omp_get_thread_num();
if (iam == 0)
_ThreadIndex __iam = omp_get_thread_num();
if (__iam == 0)
{
*result = *begin;
parallel_partial_sum_basecase(begin + 1, begin + borders[1],
result + 1, bin_op, *begin);
::new(&(sums[iam])) value_type(*(result + borders[1] - 1));
*__result = *__begin;
__parallel_partial_sum_basecase(__begin + 1, __begin + __borders[1],
__result + 1, __bin_op, *__begin);
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
}
else
{
::new(&(sums[iam]))
value_type(std::accumulate(begin + borders[iam] + 1,
begin + borders[iam + 1],
*(begin + borders[iam]),
bin_op,
::new(&(__sums[__iam]))
_ValueType(std::accumulate(__begin + __borders[__iam] + 1,
__begin + __borders[__iam + 1],
*(__begin + __borders[__iam]),
__bin_op,
__gnu_parallel::sequential_tag()));
}
# pragma omp barrier
# pragma omp single
parallel_partial_sum_basecase(
sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
__parallel_partial_sum_basecase(
__sums + 1, __sums + __num_threads, __sums + 1, __bin_op, __sums[0]);
# pragma omp barrier
// Still same team.
parallel_partial_sum_basecase(begin + borders[iam + 1],
begin + borders[iam + 2],
result + borders[iam + 1], bin_op,
sums[iam]);
__parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
__begin + __borders[__iam + 2],
__result + __borders[__iam + 1], __bin_op,
__sums[__iam]);
} //parallel
::operator delete(sums);
delete[] borders;
::operator delete(__sums);
delete[] __borders;
return result + n;
return __result + __n;
}
/** @brief Parallel partial sum front-end.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param result Begin iterator of output sequence.
* @param bin_op Associative binary function.
/** @brief Parallel partial sum front-__end.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @return End iterator of output sequence. */
template<typename InputIterator,
typename OutputIterator,
typename BinaryOperation>
OutputIterator
parallel_partial_sum(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op)
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op)
{
_GLIBCXX_CALL(begin - end)
_GLIBCXX_CALL(__begin - __end)
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
switch (_Settings::get().partial_sum_algorithm)
{
case LINEAR:
// Need an initial offset.
return parallel_partial_sum_linear(begin, end, result, bin_op, n);
// Need an initial __offset.
return __parallel_partial_sum_linear(__begin, __end, __result, __bin_op, __n);
default:
// Partial_sum algorithm not implemented.
_GLIBCXX_PARALLEL_ASSERT(0);
return result + n;
return __result + __n;
}
}
}

View File

@ -45,231 +45,231 @@
namespace __gnu_parallel
{
/** @brief Parallel implementation of std::partition.
* @param begin Begin iterator of input sequence to split.
* @param end End iterator of input sequence to split.
* @param pred Partition predicate, possibly including some kind of pivot.
* @param num_threads Maximum number of threads to use for this task.
* @param __begin Begin iterator of input sequence to split.
* @param __end End iterator of input sequence to split.
* @param __pred Partition predicate, possibly including some kind of pivot.
* @param __num_threads Maximum number of threads to use for this task.
* @return Number of elements not fulfilling the predicate. */
template<typename RandomAccessIterator, typename Predicate>
typename std::iterator_traits<RandomAccessIterator>::difference_type
parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
Predicate pred, thread_index_t num_threads)
template<typename _RAIter, typename _Predicate>
typename std::iterator_traits<_RAIter>::difference_type
__parallel_partition(_RAIter __begin, _RAIter __end,
_Predicate __pred, _ThreadIndex __num_threads)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
_GLIBCXX_CALL(n)
_GLIBCXX_CALL(__n)
const _Settings& __s = _Settings::get();
// Shared.
_GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
_GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
_GLIBCXX_VOLATILE difference_type leftnew, rightnew;
_GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1;
_GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right;
_GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew;
bool* reserved_left = NULL, * reserved_right = NULL;
bool* __reserved_left = NULL, * __reserved_right = NULL;
difference_type chunk_size;
_DifferenceType __chunk_size;
omp_lock_t result_lock;
omp_init_lock(&result_lock);
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
//at least two chunks per thread
if(right - left + 1 >= 2 * num_threads * chunk_size)
# pragma omp parallel num_threads(num_threads)
//at least two __chunks per thread
if(__right - __left + 1 >= 2 * __num_threads * __chunk_size)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
reserved_left = new bool[num_threads];
reserved_right = new bool[num_threads];
__num_threads = omp_get_num_threads();
__reserved_left = new bool[__num_threads];
__reserved_right = new bool[__num_threads];
if (__s.partition_chunk_share > 0.0)
chunk_size = std::max<difference_type>(__s.partition_chunk_size,
(double)n * __s.partition_chunk_share
/ (double)num_threads);
__chunk_size = std::max<_DifferenceType>(__s.partition_chunk_size,
(double)__n * __s.partition_chunk_share
/ (double)__num_threads);
else
chunk_size = __s.partition_chunk_size;
__chunk_size = __s.partition_chunk_size;
}
while (right - left + 1 >= 2 * num_threads * chunk_size)
while (__right - __left + 1 >= 2 * __num_threads * __chunk_size)
{
# pragma omp single
{
difference_type num_chunks = (right - left + 1) / chunk_size;
_DifferenceType __num_chunks = (__right - __left + 1) / __chunk_size;
for (int r = 0; r < num_threads; ++r)
for (int __r = 0; __r < __num_threads; ++__r)
{
reserved_left[r] = false;
reserved_right[r] = false;
__reserved_left[__r] = false;
__reserved_right[__r] = false;
}
leftover_left = 0;
leftover_right = 0;
__leftover_left = 0;
__leftover_right = 0;
} //implicit barrier
// Private.
difference_type thread_left, thread_left_border,
thread_right, thread_right_border;
thread_left = left + 1;
_DifferenceType __thread_left, __thread_left_border,
thread_right, __thread_right_border;
__thread_left = __left + 1;
// Just to satisfy the condition below.
thread_left_border = thread_left - 1;
thread_right = n - 1;
thread_right_border = thread_right + 1;
__thread_left_border = __thread_left - 1;
thread_right = __n - 1;
__thread_right_border = thread_right + 1;
bool iam_finished = false;
while (!iam_finished)
bool __iam_finished = false;
while (!__iam_finished)
{
if (thread_left > thread_left_border)
if (__thread_left > __thread_left_border)
{
omp_set_lock(&result_lock);
if (left + (chunk_size - 1) > right)
iam_finished = true;
omp_set_lock(&__result_lock);
if (__left + (__chunk_size - 1) > __right)
__iam_finished = true;
else
{
thread_left = left;
thread_left_border = left + (chunk_size - 1);
left += chunk_size;
__thread_left = __left;
__thread_left_border = __left + (__chunk_size - 1);
__left += __chunk_size;
}
omp_unset_lock(&result_lock);
omp_unset_lock(&__result_lock);
}
if (thread_right < thread_right_border)
if (thread_right < __thread_right_border)
{
omp_set_lock(&result_lock);
if (left > right - (chunk_size - 1))
iam_finished = true;
omp_set_lock(&__result_lock);
if (__left > __right - (__chunk_size - 1))
__iam_finished = true;
else
{
thread_right = right;
thread_right_border = right - (chunk_size - 1);
right -= chunk_size;
thread_right = __right;
__thread_right_border = __right - (__chunk_size - 1);
__right -= __chunk_size;
}
omp_unset_lock(&result_lock);
omp_unset_lock(&__result_lock);
}
if (iam_finished)
if (__iam_finished)
break;
// Swap as usual.
while (thread_left < thread_right)
while (__thread_left < thread_right)
{
while (pred(begin[thread_left])
&& thread_left <= thread_left_border)
++thread_left;
while (!pred(begin[thread_right])
&& thread_right >= thread_right_border)
while (__pred(__begin[__thread_left])
&& __thread_left <= __thread_left_border)
++__thread_left;
while (!__pred(__begin[thread_right])
&& thread_right >= __thread_right_border)
--thread_right;
if (thread_left > thread_left_border
|| thread_right < thread_right_border)
// Fetch new chunk(s).
if (__thread_left > __thread_left_border
|| thread_right < __thread_right_border)
// Fetch new chunk(__s).
break;
std::swap(begin[thread_left], begin[thread_right]);
++thread_left;
std::swap(__begin[__thread_left], __begin[thread_right]);
++__thread_left;
--thread_right;
}
}
// Now swap the leftover chunks to the right places.
if (thread_left <= thread_left_border)
if (__thread_left <= __thread_left_border)
# pragma omp atomic
++leftover_left;
if (thread_right >= thread_right_border)
++__leftover_left;
if (thread_right >= __thread_right_border)
# pragma omp atomic
++leftover_right;
++__leftover_right;
# pragma omp barrier
# pragma omp single
{
leftnew = left - leftover_left * chunk_size;
rightnew = right + leftover_right * chunk_size;
__leftnew = __left - __leftover_left * __chunk_size;
__rightnew = __right + __leftover_right * __chunk_size;
}
# pragma omp barrier
// <=> thread_left_border + (chunk_size - 1) >= leftnew
if (thread_left <= thread_left_border
&& thread_left_border >= leftnew)
// <=> __thread_left_border + (__chunk_size - 1) >= __leftnew
if (__thread_left <= __thread_left_border
&& __thread_left_border >= __leftnew)
{
// Chunk already in place, reserve spot.
reserved_left[(left - (thread_left_border + 1)) / chunk_size]
__reserved_left[(__left - (__thread_left_border + 1)) / __chunk_size]
= true;
}
// <=> thread_right_border - (chunk_size - 1) <= rightnew
if (thread_right >= thread_right_border
&& thread_right_border <= rightnew)
// <=> __thread_right_border - (__chunk_size - 1) <= __rightnew
if (thread_right >= __thread_right_border
&& __thread_right_border <= __rightnew)
{
// Chunk already in place, reserve spot.
reserved_right[((thread_right_border - 1) - right)
/ chunk_size] = true;
__reserved_right[((__thread_right_border - 1) - __right)
/ __chunk_size] = true;
}
# pragma omp barrier
if (thread_left <= thread_left_border
&& thread_left_border < leftnew)
if (__thread_left <= __thread_left_border
&& __thread_left_border < __leftnew)
{
// Find spot and swap.
difference_type swapstart = -1;
omp_set_lock(&result_lock);
for (int r = 0; r < leftover_left; ++r)
if (!reserved_left[r])
_DifferenceType __swapstart = -1;
omp_set_lock(&__result_lock);
for (int __r = 0; __r < __leftover_left; ++__r)
if (!__reserved_left[__r])
{
reserved_left[r] = true;
swapstart = left - (r + 1) * chunk_size;
__reserved_left[__r] = true;
__swapstart = __left - (__r + 1) * __chunk_size;
break;
}
omp_unset_lock(&result_lock);
omp_unset_lock(&__result_lock);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
#endif
std::swap_ranges(begin + thread_left_border
- (chunk_size - 1),
begin + thread_left_border + 1,
begin + swapstart);
std::swap_ranges(__begin + __thread_left_border
- (__chunk_size - 1),
__begin + __thread_left_border + 1,
__begin + __swapstart);
}
if (thread_right >= thread_right_border
&& thread_right_border > rightnew)
if (thread_right >= __thread_right_border
&& __thread_right_border > __rightnew)
{
// Find spot and swap
difference_type swapstart = -1;
omp_set_lock(&result_lock);
for (int r = 0; r < leftover_right; ++r)
if (!reserved_right[r])
_DifferenceType __swapstart = -1;
omp_set_lock(&__result_lock);
for (int __r = 0; __r < __leftover_right; ++__r)
if (!__reserved_right[__r])
{
reserved_right[r] = true;
swapstart = right + r * chunk_size + 1;
__reserved_right[__r] = true;
__swapstart = __right + __r * __chunk_size + 1;
break;
}
omp_unset_lock(&result_lock);
omp_unset_lock(&__result_lock);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
#endif
std::swap_ranges(begin + thread_right_border,
begin + thread_right_border + chunk_size,
begin + swapstart);
std::swap_ranges(__begin + __thread_right_border,
__begin + __thread_right_border + __chunk_size,
__begin + __swapstart);
}
#if _GLIBCXX_ASSERTIONS
# pragma omp barrier
# pragma omp single
{
for (int r = 0; r < leftover_left; ++r)
_GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
for (int r = 0; r < leftover_right; ++r)
_GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
for (int __r = 0; __r < __leftover_left; ++__r)
_GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]);
for (int __r = 0; __r < __leftover_right; ++__r)
_GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]);
}
# pragma omp barrier
@ -277,149 +277,149 @@ template<typename RandomAccessIterator, typename Predicate>
# pragma omp barrier
left = leftnew;
right = rightnew;
__left = __leftnew;
__right = __rightnew;
}
# pragma omp flush(left, right)
# pragma omp flush(__left, __right)
} // end "recursion" //parallel
difference_type final_left = left, final_right = right;
_DifferenceType __final_left = __left, __final_right = __right;
while (final_left < final_right)
while (__final_left < __final_right)
{
// Go right until key is geq than pivot.
while (pred(begin[final_left]) && final_left < final_right)
++final_left;
while (__pred(__begin[__final_left]) && __final_left < __final_right)
++__final_left;
// Go left until key is less than pivot.
while (!pred(begin[final_right]) && final_left < final_right)
--final_right;
while (!__pred(__begin[__final_right]) && __final_left < __final_right)
--__final_right;
if (final_left == final_right)
if (__final_left == __final_right)
break;
std::swap(begin[final_left], begin[final_right]);
++final_left;
--final_right;
std::swap(__begin[__final_left], __begin[__final_right]);
++__final_left;
--__final_right;
}
// All elements on the left side are < piv, all elements on the
// right are >= piv
delete[] reserved_left;
delete[] reserved_right;
delete[] __reserved_left;
delete[] __reserved_right;
omp_destroy_lock(&result_lock);
omp_destroy_lock(&__result_lock);
// Element "between" final_left and final_right might not have
// Element "between" __final_left and __final_right might not have
// been regarded yet
if (final_left < n && !pred(begin[final_left]))
if (__final_left < __n && !__pred(__begin[__final_left]))
// Really swapped.
return final_left;
return __final_left;
else
return final_left + 1;
return __final_left + 1;
}
/**
* @brief Parallel implementation of std::nth_element().
* @param begin Begin iterator of input sequence.
* @param nth Iterator of element that must be in position afterwards.
* @param end End iterator of input sequence.
* @param comp Comparator.
* @param __begin Begin iterator of input sequence.
* @param __nth _Iterator of element that must be in position afterwards.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
*/
template<typename RandomAccessIterator, typename Comparator>
template<typename _RAIter, typename _Compare>
void
parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth,
RandomAccessIterator end, Comparator comp)
parallel_nth_element(_RAIter __begin, _RAIter __nth,
_RAIter __end, _Compare __comp)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
RandomAccessIterator split;
random_number rng;
_RAIter __split;
_RandomNumber __rng;
difference_type minimum_length =
std::max<difference_type>(2, _Settings::get().partition_minimal_n);
_DifferenceType minimum_length =
std::max<_DifferenceType>(2, _Settings::get().partition_minimal_n);
// Break if input range to small.
while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
while (static_cast<_SequenceIndex>(__end - __begin) >= minimum_length)
{
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
RandomAccessIterator pivot_pos = begin + rng(n);
_RAIter __pivot_pos = __begin + __rng(__n);
// Swap pivot_pos value to end.
if (pivot_pos != (end - 1))
std::swap(*pivot_pos, *(end - 1));
pivot_pos = end - 1;
// Swap __pivot_pos value to end.
if (__pivot_pos != (__end - 1))
std::swap(*__pivot_pos, *(__end - 1));
__pivot_pos = __end - 1;
// XXX Comparator must have first_value_type, second_value_type,
// result_type
// Comparator == __gnu_parallel::lexicographic<S, int,
// __gnu_parallel::less<S, S> >
// pivot_pos == std::pair<S, int>*
// XXX binder2nd only for RandomAccessIterators??
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
pred(comp, *pivot_pos);
// XXX _Compare must have first__ValueType, second__ValueType,
// _ResultType
// _Compare == __gnu_parallel::_Lexicographic<S, int,
// __gnu_parallel::_Less<S, S> >
// __pivot_pos == std::pair<S, int>*
// XXX binder2nd only for _RAIters??
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos);
// Divide, leave pivot unchanged in last place.
RandomAccessIterator split_pos1, split_pos2;
split_pos1 = begin + parallel_partition(begin, end - 1, pred,
get_max_threads());
_RAIter __split_pos1, __split_pos2;
__split_pos1 = __begin + __parallel_partition(__begin, __end - 1, __pred,
__get_max_threads());
// Left side: < pivot_pos; right side: >= pivot_pos
// Left side: < __pivot_pos; __right side: >= __pivot_pos
// Swap pivot back to middle.
if (split_pos1 != pivot_pos)
std::swap(*split_pos1, *pivot_pos);
pivot_pos = split_pos1;
if (__split_pos1 != __pivot_pos)
std::swap(*__split_pos1, *__pivot_pos);
__pivot_pos = __split_pos1;
// In case all elements are equal, split_pos1 == 0
if ((split_pos1 + 1 - begin) < (n >> 7)
|| (end - split_pos1) < (n >> 7))
// In case all elements are equal, __split_pos1 == 0
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|| (__end - __split_pos1) < (__n >> 7))
{
// Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot.
__gnu_parallel::unary_negate<__gnu_parallel::
binder1st<Comparator, value_type, value_type, bool>, value_type>
pred(__gnu_parallel::binder1st<Comparator, value_type,
value_type, bool>(comp, *pivot_pos));
__gnu_parallel::__unary_negate<__gnu_parallel::
__binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
_ValueType, bool>(__comp, *__pivot_pos));
// Find other end of pivot-equal range.
split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
end, pred);
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred);
}
else
// Only skip the pivot.
split_pos2 = split_pos1 + 1;
__split_pos2 = __split_pos1 + 1;
// Compare iterators.
if (split_pos2 <= nth)
begin = split_pos2;
else if (nth < split_pos1)
end = split_pos1;
if (__split_pos2 <= __nth)
__begin = __split_pos2;
else if (__nth < __split_pos1)
__end = __split_pos1;
else
break;
}
// Only at most _Settings::partition_minimal_n elements left.
__gnu_sequential::sort(begin, end, comp);
// Only at most _Settings::partition_minimal_n __elements __left.
__gnu_sequential::sort(__begin, __end, __comp);
}
/** @brief Parallel implementation of std::partial_sort().
* @param begin Begin iterator of input sequence.
* @param middle Sort until this position.
* @param end End iterator of input sequence.
* @param comp Comparator. */
template<typename RandomAccessIterator, typename Comparator>
* @param __begin Begin iterator of input sequence.
* @param __middle Sort until this position.
* @param __end End iterator of input sequence.
* @param __comp Comparator. */
template<typename _RAIter, typename _Compare>
void
parallel_partial_sort(RandomAccessIterator begin,
RandomAccessIterator middle,
RandomAccessIterator end, Comparator comp)
parallel_partial_sort(_RAIter __begin,
_RAIter __middle,
_RAIter __end, _Compare __comp)
{
parallel_nth_element(begin, middle, end, comp);
std::sort(begin, middle, comp);
parallel_nth_element(__begin, __middle, __end, __comp);
std::sort(__begin, __middle, __comp);
}
} //namespace __gnu_parallel

View File

@ -45,99 +45,99 @@ namespace __gnu_parallel
* atomic access. push_front() and pop_front() must not be called
* concurrently to each other, while pop_back() can be called
* concurrently at all times.
* @c empty(), @c size(), and @c top() are intentionally not provided.
* @__c empty(), @__c size(), and @__c top() are intentionally not provided.
* Calling them would not make sense in a concurrent setting.
* @param T Contained element type. */
template<typename T>
class RestrictedBoundedConcurrentQueue
* @param _Tp Contained element type. */
template<typename _Tp>
class _RestrictedBoundedConcurrentQueue
{
private:
/** @brief Array of elements, seen as cyclic buffer. */
T* base;
_Tp* _M_base;
/** @brief Maximal number of elements contained at the same time. */
sequence_index_t max_size;
_SequenceIndex _M_max_size;
/** @brief Cyclic begin and end pointers contained in one
/** @brief Cyclic __begin and __end pointers contained in one
atomically changeable value. */
_GLIBCXX_VOLATILE lcas_t borders;
_GLIBCXX_VOLATILE _CASable _M_borders;
public:
/** @brief Constructor. Not to be called concurrent, of course.
* @param max_size Maximal number of elements to be contained. */
RestrictedBoundedConcurrentQueue(sequence_index_t max_size)
* @param _M_max_size Maximal number of elements to be contained. */
_RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size)
{
this->max_size = max_size;
base = new T[max_size];
borders = encode2(0, 0);
this->_M_max_size = _M_max_size;
_M_base = new _Tp[_M_max_size];
_M_borders = __encode2(0, 0);
#pragma omp flush
}
/** @brief Destructor. Not to be called concurrent, of course. */
~RestrictedBoundedConcurrentQueue()
{ delete[] base; }
~_RestrictedBoundedConcurrentQueue()
{ delete[] _M_base; }
/** @brief Pushes one element into the queue at the front end.
/** @brief Pushes one element into the queue at the front __end.
* Must not be called concurrently with pop_front(). */
void
push_front(const T& t)
push_front(const _Tp& __t)
{
lcas_t former_borders = borders;
int former_front, former_back;
decode2(former_borders, former_front, former_back);
*(base + former_front % max_size) = t;
_CASable __former_borders = _M_borders;
int __former_front, __former_back;
decode2(__former_borders, __former_front, __former_back);
*(_M_base + __former_front % _M_max_size) = __t;
#if _GLIBCXX_ASSERTIONS
// Otherwise: front - back > max_size eventually.
_GLIBCXX_PARALLEL_ASSERT(((former_front + 1) - former_back)
<= max_size);
// Otherwise: front - back > _M_max_size eventually.
_GLIBCXX_PARALLEL_ASSERT(((__former_front + 1) - __former_back)
<= _M_max_size);
#endif
fetch_and_add(&borders, encode2(1, 0));
__fetch_and_add(&_M_borders, __encode2(1, 0));
}
/** @brief Pops one element from the queue at the front end.
/** @brief Pops one element from the queue at the front __end.
* Must not be called concurrently with pop_front(). */
bool
pop_front(T& t)
pop_front(_Tp& __t)
{
int former_front, former_back;
int __former_front, __former_back;
#pragma omp flush
decode2(borders, former_front, former_back);
while (former_front > former_back)
decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back)
{
// Chance.
lcas_t former_borders = encode2(former_front, former_back);
lcas_t new_borders = encode2(former_front - 1, former_back);
if (compare_and_swap(&borders, former_borders, new_borders))
_CASable __former_borders = __encode2(__former_front, __former_back);
_CASable __new_borders = __encode2(__former_front - 1, __former_back);
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders))
{
t = *(base + (former_front - 1) % max_size);
__t = *(_M_base + (__former_front - 1) % _M_max_size);
return true;
}
#pragma omp flush
decode2(borders, former_front, former_back);
decode2(_M_borders, __former_front, __former_back);
}
return false;
}
/** @brief Pops one element from the queue at the front end.
/** @brief Pops one element from the queue at the front __end.
* Must not be called concurrently with pop_front(). */
bool
pop_back(T& t) //queue behavior
pop_back(_Tp& __t) //queue behavior
{
int former_front, former_back;
int __former_front, __former_back;
#pragma omp flush
decode2(borders, former_front, former_back);
while (former_front > former_back)
decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back)
{
// Chance.
lcas_t former_borders = encode2(former_front, former_back);
lcas_t new_borders = encode2(former_front, former_back + 1);
if (compare_and_swap(&borders, former_borders, new_borders))
_CASable __former_borders = __encode2(__former_front, __former_back);
_CASable __new_borders = __encode2(__former_front, __former_back + 1);
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders))
{
t = *(base + former_back % max_size);
__t = *(_M_base + __former_back % _M_max_size);
return true;
}
#pragma omp flush
decode2(borders, former_front, former_back);
decode2(_M_borders, __former_front, __former_back);
}
return false;
}

View File

@ -38,140 +38,140 @@
namespace __gnu_parallel
{
/** @brief Unbalanced quicksort divide step.
* @param begin Begin iterator of subsequence.
* @param end End iterator of subsequence.
* @param comp Comparator.
* @param pivot_rank Desired rank of the pivot.
* @param num_samples Choose pivot from that many samples.
* @param num_threads Number of threads that are allowed to work on
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __pivot_rank Desired __rank of the pivot.
* @param __num_samples Choose pivot from that many samples.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename RandomAccessIterator, typename Comparator>
typename std::iterator_traits<RandomAccessIterator>::difference_type
parallel_sort_qs_divide(RandomAccessIterator begin,
RandomAccessIterator end,
Comparator comp, typename std::iterator_traits
<RandomAccessIterator>::difference_type pivot_rank,
template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type
__parallel_sort_qs_divide(_RAIter __begin,
_RAIter __end,
_Compare __comp, typename std::iterator_traits
<_RAIter>::difference_type __pivot_rank,
typename std::iterator_traits
<RandomAccessIterator>::difference_type
num_samples, thread_index_t num_threads)
<_RAIter>::difference_type
__num_samples, _ThreadIndex __num_threads)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
num_samples = std::min(num_samples, n);
_DifferenceType __n = __end - __begin;
__num_samples = std::min(__num_samples, __n);
// Allocate uninitialized, to avoid default constructor.
value_type* samples =
static_cast<value_type*>(::operator new(num_samples
* sizeof(value_type)));
_ValueType* __samples =
static_cast<_ValueType*>(::operator new(__num_samples
* sizeof(_ValueType)));
for (difference_type s = 0; s < num_samples; ++s)
for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
{
const unsigned long long index = static_cast<unsigned long long>(s)
* n / num_samples;
::new(&(samples[s])) value_type(begin[index]);
const unsigned long long __index = static_cast<unsigned long long>(__s)
* __n / __num_samples;
::new(&(__samples[__s])) _ValueType(__begin[__index]);
}
__gnu_sequential::sort(samples, samples + num_samples, comp);
__gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
value_type& pivot = samples[pivot_rank * num_samples / n];
_ValueType& pivot = __samples[__pivot_rank * __num_samples / __n];
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
pred(comp, pivot);
difference_type split =
parallel_partition(begin, end, pred, num_threads);
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, pivot);
_DifferenceType __split =
__parallel_partition(__begin, __end, __pred, __num_threads);
::operator delete(samples);
::operator delete(__samples);
return split;
return __split;
}
/** @brief Unbalanced quicksort conquer step.
* @param begin Begin iterator of subsequence.
* @param end End iterator of subsequence.
* @param comp Comparator.
* @param num_threads Number of threads that are allowed to work on
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename RandomAccessIterator, typename Comparator>
template<typename _RAIter, typename _Compare>
void
parallel_sort_qs_conquer(RandomAccessIterator begin,
RandomAccessIterator end,
Comparator comp,
thread_index_t num_threads)
__parallel_sort_qs_conquer(_RAIter __begin,
_RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
if (num_threads <= 1)
if (__num_threads <= 1)
{
__gnu_sequential::sort(begin, end, comp);
__gnu_sequential::sort(__begin, __end, __comp);
return;
}
difference_type n = end - begin, pivot_rank;
_DifferenceType __n = __end - __begin, __pivot_rank;
if (n <= 1)
if (__n <= 1)
return;
thread_index_t num_threads_left;
_ThreadIndex __num_threads_left;
if ((num_threads % 2) == 1)
num_threads_left = num_threads / 2 + 1;
if ((__num_threads % 2) == 1)
__num_threads_left = __num_threads / 2 + 1;
else
num_threads_left = num_threads / 2;
__num_threads_left = __num_threads / 2;
pivot_rank = n * num_threads_left / num_threads;
__pivot_rank = __n * __num_threads_left / __num_threads;
difference_type split =
parallel_sort_qs_divide(begin, end, comp, pivot_rank,
_DifferenceType __split =
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank,
_Settings::get().sort_qs_num_samples_preset,
num_threads);
__num_threads);
#pragma omp parallel sections num_threads(2)
{
#pragma omp section
parallel_sort_qs_conquer(begin, begin + split,
comp, num_threads_left);
__parallel_sort_qs_conquer(__begin, __begin + __split,
__comp, __num_threads_left);
#pragma omp section
parallel_sort_qs_conquer(begin + split, end,
comp, num_threads - num_threads_left);
__parallel_sort_qs_conquer(__begin + __split, __end,
__comp, __num_threads - __num_threads_left);
}
}
/** @brief Unbalanced quicksort main call.
* @param begin Begin iterator of input sequence.
* @param end End iterator input sequence, ignored.
* @param comp Comparator.
* @param num_threads Number of threads that are allowed to work on
* @param __begin Begin iterator of input sequence.
* @param __end End iterator input sequence, ignored.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename RandomAccessIterator, typename Comparator>
template<typename _RAIter, typename _Compare>
void
parallel_sort_qs(RandomAccessIterator begin,
RandomAccessIterator end,
Comparator comp,
thread_index_t num_threads)
__parallel_sort_qs(_RAIter __begin,
_RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
_GLIBCXX_CALL(n)
_GLIBCXX_CALL(__n)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
// At least one element per processor.
if (num_threads > n)
num_threads = static_cast<thread_index_t>(n);
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
parallel_sort_qs_conquer(begin, begin + n, comp, num_threads);
__parallel_sort_qs_conquer(__begin, __begin + __n, __comp, __num_threads);
}
} //namespace __gnu_parallel

View File

@ -38,84 +38,84 @@
namespace __gnu_parallel
{
/** @brief Random number generator, based on the Mersenne twister. */
class random_number
class _RandomNumber
{
private:
std::tr1::mt19937 mt;
uint64 supremum;
uint64 RAND_SUP;
double supremum_reciprocal;
double RAND_SUP_REC;
std::tr1::mt19937 _M_mt;
uint64 _M_supremum;
uint64 _RAND_SUP;
double _M_supremum_reciprocal;
double _RAND_SUP_REC;
// Assumed to be twice as long as the usual random number.
uint64 cache;
uint64 __cache;
// Bit results.
int bits_left;
int __bits_left;
static uint32
scale_down(uint64 x,
__scale_down(uint64 __x,
#if _GLIBCXX_SCALE_DOWN_FPU
uint64 /*supremum*/, double supremum_reciprocal)
uint64 /*_M_supremum*/, double _M_supremum_reciprocal)
#else
uint64 supremum, double /*supremum_reciprocal*/)
uint64 _M_supremum, double /*_M_supremum_reciprocal*/)
#endif
{
#if _GLIBCXX_SCALE_DOWN_FPU
return uint32(x * supremum_reciprocal);
return uint32(__x * _M_supremum_reciprocal);
#else
return static_cast<uint32>(x % supremum);
return static_cast<uint32>(__x % _M_supremum);
#endif
}
public:
/** @brief Default constructor. Seed with 0. */
random_number()
: mt(0), supremum(0x100000000ULL),
RAND_SUP(1ULL << (sizeof(uint32) * 8)),
supremum_reciprocal(double(supremum) / double(RAND_SUP)),
RAND_SUP_REC(1.0 / double(RAND_SUP)),
cache(0), bits_left(0) { }
_RandomNumber()
: _M_mt(0), _M_supremum(0x100000000ULL),
_RAND_SUP(1ULL << (sizeof(uint32) * 8)),
_M_supremum_reciprocal(double(_M_supremum) / double(_RAND_SUP)),
_RAND_SUP_REC(1.0 / double(_RAND_SUP)),
__cache(0), __bits_left(0) { }
/** @brief Constructor.
* @param seed Random seed.
* @param supremum Generate integer random numbers in the
* interval @c [0,supremum). */
random_number(uint32 seed, uint64 supremum = 0x100000000ULL)
: mt(seed), supremum(supremum),
RAND_SUP(1ULL << (sizeof(uint32) * 8)),
supremum_reciprocal(double(supremum) / double(RAND_SUP)),
RAND_SUP_REC(1.0 / double(RAND_SUP)),
cache(0), bits_left(0) { }
* @param __seed Random __seed.
* @param _M_supremum Generate integer random numbers in the
* interval @__c [0,_M_supremum). */
_RandomNumber(uint32 __seed, uint64 _M_supremum = 0x100000000ULL)
: _M_mt(__seed), _M_supremum(_M_supremum),
_RAND_SUP(1ULL << (sizeof(uint32) * 8)),
_M_supremum_reciprocal(double(_M_supremum) / double(_RAND_SUP)),
_RAND_SUP_REC(1.0 / double(_RAND_SUP)),
__cache(0), __bits_left(0) { }
/** @brief Generate unsigned random 32-bit integer. */
uint32
operator()()
{ return scale_down(mt(), supremum, supremum_reciprocal); }
{ return __scale_down(_M_mt(), _M_supremum, _M_supremum_reciprocal); }
/** @brief Generate unsigned random 32-bit integer in the
interval @c [0,local_supremum). */
interval @__c [0,local_supremum). */
uint32
operator()(uint64 local_supremum)
{
return scale_down(mt(), local_supremum,
double(local_supremum * RAND_SUP_REC));
return __scale_down(_M_mt(), local_supremum,
double(local_supremum * _RAND_SUP_REC));
}
/** @brief Generate a number of random bits, run-time parameter.
* @param bits Number of bits to generate. */
unsigned long
genrand_bits(int bits)
__genrand_bits(int bits)
{
unsigned long res = cache & ((1 << bits) - 1);
cache = cache >> bits;
bits_left -= bits;
if (bits_left < 32)
unsigned long __res = __cache & ((1 << bits) - 1);
__cache = __cache >> bits;
__bits_left -= bits;
if (__bits_left < 32)
{
cache |= ((uint64(mt())) << bits_left);
bits_left += 32;
__cache |= ((uint64(_M_mt())) << __bits_left);
__bits_left += 32;
}
return res;
return __res;
}
};

View File

@ -41,477 +41,477 @@ namespace __gnu_parallel
{
/** @brief Type to hold the index of a bin.
*
* Since many variables of this type are allocated, it should be
* Since many variables of this _Self are allocated, it should be
* chosen as small as possible.
*/
typedef unsigned short bin_index;
typedef unsigned short _BinIndex;
/** @brief Data known to every thread participating in
__gnu_parallel::parallel_random_shuffle(). */
template<typename RandomAccessIterator>
struct DRandomShufflingGlobalData
__gnu_parallel::__parallel_random_shuffle(). */
template<typename _RAIter>
struct _DRandomShufflingGlobalData
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
/** @brief Begin iterator of the source. */
RandomAccessIterator& source;
/** @brief Begin iterator of the _M_source. */
_RAIter& _M_source;
/** @brief Temporary arrays for each thread. */
value_type** temporaries;
_ValueType** _M_temporaries;
/** @brief Two-dimensional array to hold the thread-bin distribution.
*
* Dimensions (num_threads + 1) x (num_bins + 1). */
difference_type** dist;
* Dimensions (__num_threads + 1) __x (_M_num_bins + 1). */
_DifferenceType** _M_dist;
/** @brief Start indexes of the threads' chunks. */
difference_type* starts;
/** @brief Start indexes of the threads' __chunks. */
_DifferenceType* _M_starts;
/** @brief Number of the thread that will further process the
corresponding bin. */
thread_index_t* bin_proc;
_ThreadIndex* _M_bin_proc;
/** @brief Number of bins to distribute to. */
int num_bins;
int _M_num_bins;
/** @brief Number of bits needed to address the bins. */
int num_bits;
int _M_num_bits;
/** @brief Constructor. */
DRandomShufflingGlobalData(RandomAccessIterator& _source)
: source(_source) { }
_DRandomShufflingGlobalData(_RAIter& _source)
: _M_source(_source) { }
};
/** @brief Local data for a thread participating in
__gnu_parallel::parallel_random_shuffle().
__gnu_parallel::__parallel_random_shuffle().
*/
template<typename RandomAccessIterator, typename RandomNumberGenerator>
struct DRSSorterPU
template<typename _RAIter, typename RandomNumberGenerator>
struct _DRSSorterPU
{
/** @brief Number of threads participating in total. */
int num_threads;
int __num_threads;
/** @brief Begin index for bins taken care of by this thread. */
bin_index bins_begin;
/** @brief Begin __index for bins taken care of by this thread. */
_BinIndex _M_bins_begin;
/** @brief End index for bins taken care of by this thread. */
bin_index bins_end;
/** @brief End __index for bins taken care of by this thread. */
_BinIndex __bins_end;
/** @brief Random seed for this thread. */
uint32 seed;
/** @brief Random _M_seed for this thread. */
uint32 _M_seed;
/** @brief Pointer to global data. */
DRandomShufflingGlobalData<RandomAccessIterator>* sd;
_DRandomShufflingGlobalData<_RAIter>* _M_sd;
};
/** @brief Generate a random number in @c [0,2^logp).
* @param logp Logarithm (basis 2) of the upper range bound.
* @param rng Random number generator to use.
/** @brief Generate a random number in @__c [0,2^logp).
* @param logp Logarithm (basis 2) of the upper range __bound.
* @param __rng Random number generator to use.
*/
template<typename RandomNumberGenerator>
inline int
random_number_pow2(int logp, RandomNumberGenerator& rng)
{ return rng.genrand_bits(logp); }
__random_number_pow2(int logp, RandomNumberGenerator& __rng)
{ return __rng.__genrand_bits(logp); }
/** @brief Random shuffle code executed by each thread.
* @param pus Array of thread-local data records. */
template<typename RandomAccessIterator, typename RandomNumberGenerator>
* @param __pus Array of thread-local data records. */
template<typename _RAIter, typename RandomNumberGenerator>
void
parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
RandomNumberGenerator>* pus)
__parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
RandomNumberGenerator>* __pus)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
thread_index_t iam = omp_get_thread_num();
DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
_ThreadIndex __iam = omp_get_thread_num();
_DRSSorterPU<_RAIter, RandomNumberGenerator>* d = &__pus[__iam];
_DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd;
// Indexing: dist[bin][processor]
difference_type length = sd->starts[iam + 1] - sd->starts[iam];
bin_index* oracles = new bin_index[length];
difference_type* dist = new difference_type[sd->num_bins + 1];
bin_index* bin_proc = new bin_index[sd->num_bins];
value_type** temporaries = new value_type*[d->num_threads];
// Indexing: _M_dist[bin][processor]
_DifferenceType __length = _M_sd->_M_starts[__iam + 1] - _M_sd->_M_starts[__iam];
_BinIndex* __oracles = new _BinIndex[__length];
_DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1];
_BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins];
_ValueType** _M_temporaries = new _ValueType*[d->__num_threads];
// Compute oracles and count appearances.
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
dist[b] = 0;
int num_bits = sd->num_bits;
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
_M_dist[__b] = 0;
int _M_num_bits = _M_sd->_M_num_bits;
random_number rng(d->seed);
_RandomNumber __rng(d->_M_seed);
// First main loop.
for (difference_type i = 0; i < length; ++i)
for (_DifferenceType __i = 0; __i < __length; ++__i)
{
bin_index oracle = random_number_pow2(num_bits, rng);
oracles[i] = oracle;
_BinIndex __oracle = __random_number_pow2(_M_num_bits, __rng);
__oracles[__i] = __oracle;
// To allow prefix (partial) sum.
++(dist[oracle + 1]);
++(_M_dist[__oracle + 1]);
}
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
sd->dist[b][iam + 1] = dist[b];
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
_M_sd->_M_dist[__b][__iam + 1] = _M_dist[__b];
# pragma omp barrier
# pragma omp single
{
// Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
// total number of items in bin s
for (bin_index s = 0; s < sd->num_bins; ++s)
__gnu_sequential::partial_sum(sd->dist[s + 1],
sd->dist[s + 1] + d->num_threads + 1,
sd->dist[s + 1]);
// Sum up bins, _M_sd->_M_dist[__s + 1][d->__num_threads] now contains the
// total number of items in bin __s
for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s)
__gnu_sequential::partial_sum(_M_sd->_M_dist[__s + 1],
_M_sd->_M_dist[__s + 1] + d->__num_threads + 1,
_M_sd->_M_dist[__s + 1]);
}
# pragma omp barrier
sequence_index_t offset = 0, global_offset = 0;
for (bin_index s = 0; s < d->bins_begin; ++s)
global_offset += sd->dist[s + 1][d->num_threads];
_SequenceIndex __offset = 0, __global_offset = 0;
for (_BinIndex __s = 0; __s < d->_M_bins_begin; ++__s)
__global_offset += _M_sd->_M_dist[__s + 1][d->__num_threads];
# pragma omp barrier
for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s)
{
for (int t = 0; t < d->num_threads + 1; ++t)
sd->dist[s + 1][t] += offset;
offset = sd->dist[s + 1][d->num_threads];
for (int __t = 0; __t < d->__num_threads + 1; ++__t)
_M_sd->_M_dist[__s + 1][__t] += __offset;
__offset = _M_sd->_M_dist[__s + 1][d->__num_threads];
}
sd->temporaries[iam] = static_cast<value_type*>(
::operator new(sizeof(value_type) * offset));
_M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>(
::operator new(sizeof(_ValueType) * __offset));
# pragma omp barrier
// Draw local copies to avoid false sharing.
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
dist[b] = sd->dist[b][iam];
for (bin_index b = 0; b < sd->num_bins; ++b)
bin_proc[b] = sd->bin_proc[b];
for (thread_index_t t = 0; t < d->num_threads; ++t)
temporaries[t] = sd->temporaries[t];
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
_M_dist[__b] = _M_sd->_M_dist[__b][__iam];
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins; ++__b)
_M_bin_proc[__b] = _M_sd->_M_bin_proc[__b];
for (_ThreadIndex __t = 0; __t < d->__num_threads; ++__t)
_M_temporaries[__t] = _M_sd->_M_temporaries[__t];
RandomAccessIterator source = sd->source;
difference_type start = sd->starts[iam];
_RAIter _M_source = _M_sd->_M_source;
_DifferenceType __start = _M_sd->_M_starts[__iam];
// Distribute according to oracles, second main loop.
for (difference_type i = 0; i < length; ++i)
for (_DifferenceType __i = 0; __i < __length; ++__i)
{
bin_index target_bin = oracles[i];
thread_index_t target_p = bin_proc[target_bin];
_BinIndex target_bin = __oracles[__i];
_ThreadIndex target_p = _M_bin_proc[target_bin];
// Last column [d->num_threads] stays unchanged.
::new(&(temporaries[target_p][dist[target_bin + 1]++]))
value_type(*(source + i + start));
// Last column [d->__num_threads] stays unchanged.
::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++]))
_ValueType(*(_M_source + __i + __start));
}
delete[] oracles;
delete[] dist;
delete[] bin_proc;
delete[] temporaries;
delete[] __oracles;
delete[] _M_dist;
delete[] _M_bin_proc;
delete[] _M_temporaries;
# pragma omp barrier
// Shuffle bins internally.
for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
for (_BinIndex __b = d->_M_bins_begin; __b < d->__bins_end; ++__b)
{
value_type* begin =
sd->temporaries[iam] +
((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
* end =
sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
sequential_random_shuffle(begin, end, rng);
std::copy(begin, end, sd->source + global_offset +
((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
_ValueType* __begin =
_M_sd->_M_temporaries[__iam] +
((__b == d->_M_bins_begin) ? 0 : _M_sd->_M_dist[__b][d->__num_threads]),
* __end =
_M_sd->_M_temporaries[__iam] + _M_sd->_M_dist[__b + 1][d->__num_threads];
__sequential_random_shuffle(__begin, __end, __rng);
std::copy(__begin, __end, _M_sd->_M_source + __global_offset +
((__b == d->_M_bins_begin) ? 0 : _M_sd->_M_dist[__b][d->__num_threads]));
}
::operator delete(sd->temporaries[iam]);
::operator delete(_M_sd->_M_temporaries[__iam]);
}
/** @brief Round up to the next greater power of 2.
* @param x Integer to round up */
template<typename T>
T
round_up_to_pow2(T x)
* @param __x _Integer to round up */
template<typename _Tp>
_Tp
__round_up_to_pow2(_Tp __x)
{
if (x <= 1)
if (__x <= 1)
return 1;
else
return (T)1 << (__log2(x - 1) + 1);
return (_Tp)1 << (__log2(__x - 1) + 1);
}
/** @brief Main parallel random shuffle step.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param n Length of sequence.
* @param num_threads Number of threads to use.
* @param rng Random number generator to use.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
* @param __rng Random number generator to use.
*/
template<typename RandomAccessIterator, typename RandomNumberGenerator>
template<typename _RAIter, typename RandomNumberGenerator>
void
parallel_random_shuffle_drs(RandomAccessIterator begin,
RandomAccessIterator end,
__parallel_random_shuffle_drs(_RAIter __begin,
_RAIter __end,
typename std::iterator_traits
<RandomAccessIterator>::difference_type n,
thread_index_t num_threads,
RandomNumberGenerator& rng)
<_RAIter>::difference_type __n,
_ThreadIndex __num_threads,
RandomNumberGenerator& __rng)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_GLIBCXX_CALL(n)
_GLIBCXX_CALL(__n)
const _Settings& __s = _Settings::get();
if (num_threads > n)
num_threads = static_cast<thread_index_t>(n);
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
bin_index num_bins, num_bins_cache;
_BinIndex _M_num_bins, __num_bins_cache;
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
// Try the L1 cache first.
// Must fit into L1.
num_bins_cache = std::max<difference_type>(
1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
num_bins_cache = round_up_to_pow2(num_bins_cache);
__num_bins_cache = std::max<_DifferenceType>(
1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size.
num_bins = std::min<difference_type>(n, num_bins_cache);
_M_num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin.
num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
_M_num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
#endif
num_bins = round_up_to_pow2(num_bins);
_M_num_bins = __round_up_to_pow2(_M_num_bins);
if (num_bins < num_bins_cache)
if (_M_num_bins < __num_bins_cache)
{
#endif
// Now try the L2 cache
// Must fit into L2
num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
1, n / (__s.L2_cache_size / sizeof(value_type))));
num_bins_cache = round_up_to_pow2(num_bins_cache);
__num_bins_cache = static_cast<_BinIndex>(std::max<_DifferenceType>(
1, __n / (__s.L2_cache_size / sizeof(_ValueType))));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2.
num_bins = static_cast<bin_index>(
std::min(n, static_cast<difference_type>(num_bins_cache)));
_M_num_bins = static_cast<_BinIndex>(
std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
// Power of 2 and at least one element per bin, at most the TLB size.
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin.
num_bins = std::min(
static_cast<difference_type>(__s.TLB_size / 2), num_bins);
_M_num_bins = std::min(
static_cast<_DifferenceType>(__s.TLB_size / 2), _M_num_bins);
#endif
num_bins = round_up_to_pow2(num_bins);
_M_num_bins = __round_up_to_pow2(_M_num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
}
#endif
num_threads = std::min<bin_index>(num_threads, num_bins);
__num_threads = std::min<_BinIndex>(__num_threads, _M_num_bins);
if (num_threads <= 1)
return sequential_random_shuffle(begin, end, rng);
if (__num_threads <= 1)
return __sequential_random_shuffle(__begin, __end, __rng);
DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
DRSSorterPU<RandomAccessIterator, random_number >* pus;
difference_type* starts;
_DRandomShufflingGlobalData<_RAIter> _M_sd(__begin);
_DRSSorterPU<_RAIter, _RandomNumber >* __pus;
_DifferenceType* _M_starts;
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
thread_index_t num_threads = omp_get_num_threads();
_ThreadIndex __num_threads = omp_get_num_threads();
# pragma omp single
{
pus = new DRSSorterPU<RandomAccessIterator, random_number>
[num_threads];
__pus = new _DRSSorterPU<_RAIter, _RandomNumber>
[__num_threads];
sd.temporaries = new value_type*[num_threads];
sd.dist = new difference_type*[num_bins + 1];
sd.bin_proc = new thread_index_t[num_bins];
for (bin_index b = 0; b < num_bins + 1; ++b)
sd.dist[b] = new difference_type[num_threads + 1];
for (bin_index b = 0; b < (num_bins + 1); ++b)
_M_sd._M_temporaries = new _ValueType*[__num_threads];
_M_sd._M_dist = new _DifferenceType*[_M_num_bins + 1];
_M_sd._M_bin_proc = new _ThreadIndex[_M_num_bins];
for (_BinIndex __b = 0; __b < _M_num_bins + 1; ++__b)
_M_sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
for (_BinIndex __b = 0; __b < (_M_num_bins + 1); ++__b)
{
sd.dist[0][0] = 0;
sd.dist[b][0] = 0;
_M_sd._M_dist[0][0] = 0;
_M_sd._M_dist[__b][0] = 0;
}
starts = sd.starts = new difference_type[num_threads + 1];
_M_starts = _M_sd._M_starts = new _DifferenceType[__num_threads + 1];
int bin_cursor = 0;
sd.num_bins = num_bins;
sd.num_bits = __log2(num_bins);
_M_sd._M_num_bins = _M_num_bins;
_M_sd._M_num_bits = __log2(_M_num_bins);
difference_type chunk_length = n / num_threads,
split = n % num_threads, start = 0;
difference_type bin_chunk_length = num_bins / num_threads,
bin_split = num_bins % num_threads;
for (thread_index_t i = 0; i < num_threads; ++i)
_DifferenceType __chunk_length = __n / __num_threads,
__split = __n % __num_threads, __start = 0;
_DifferenceType bin_chunk_length = _M_num_bins / __num_threads,
bin_split = _M_num_bins % __num_threads;
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{
starts[i] = start;
start += (i < split) ? (chunk_length + 1) : chunk_length;
int j = pus[i].bins_begin = bin_cursor;
_M_starts[__i] = __start;
__start += (__i < __split) ? (__chunk_length + 1) : __chunk_length;
int __j = __pus[__i]._M_bins_begin = bin_cursor;
// Range of bins for this processor.
bin_cursor += (i < bin_split) ?
bin_cursor += (__i < bin_split) ?
(bin_chunk_length + 1) : bin_chunk_length;
pus[i].bins_end = bin_cursor;
for (; j < bin_cursor; ++j)
sd.bin_proc[j] = i;
pus[i].num_threads = num_threads;
pus[i].seed = rng(std::numeric_limits<uint32>::max());
pus[i].sd = &sd;
__pus[__i].__bins_end = bin_cursor;
for (; __j < bin_cursor; ++__j)
_M_sd._M_bin_proc[__j] = __i;
__pus[__i].__num_threads = __num_threads;
__pus[__i]._M_seed = __rng(std::numeric_limits<uint32>::max());
__pus[__i]._M_sd = &_M_sd;
}
starts[num_threads] = start;
_M_starts[__num_threads] = __start;
} //single
// Now shuffle in parallel.
parallel_random_shuffle_drs_pu(pus);
__parallel_random_shuffle_drs_pu(__pus);
} // parallel
delete[] starts;
delete[] sd.bin_proc;
for (int s = 0; s < (num_bins + 1); ++s)
delete[] sd.dist[s];
delete[] sd.dist;
delete[] sd.temporaries;
delete[] _M_starts;
delete[] _M_sd._M_bin_proc;
for (int __s = 0; __s < (_M_num_bins + 1); ++__s)
delete[] _M_sd._M_dist[__s];
delete[] _M_sd._M_dist;
delete[] _M_sd._M_temporaries;
delete[] pus;
delete[] __pus;
}
/** @brief Sequential cache-efficient random shuffle.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param rng Random number generator to use.
/** @brief Sequential __cache-efficient random shuffle.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __rng Random number generator to use.
*/
template<typename RandomAccessIterator, typename RandomNumberGenerator>
template<typename _RAIter, typename RandomNumberGenerator>
void
sequential_random_shuffle(RandomAccessIterator begin,
RandomAccessIterator end,
RandomNumberGenerator& rng)
__sequential_random_shuffle(_RAIter __begin,
_RAIter __end,
RandomNumberGenerator& __rng)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
const _Settings& __s = _Settings::get();
bin_index num_bins, num_bins_cache;
_BinIndex _M_num_bins, __num_bins_cache;
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
// Try the L1 cache first, must fit into L1.
num_bins_cache =
std::max<difference_type>
(1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
num_bins_cache = round_up_to_pow2(num_bins_cache);
__num_bins_cache =
std::max<_DifferenceType>
(1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size
num_bins = std::min(n, (difference_type)num_bins_cache);
_M_num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin
num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
_M_num_bins = std::min((_DifferenceType)__s.TLB_size / 2, _M_num_bins);
#endif
num_bins = round_up_to_pow2(num_bins);
_M_num_bins = __round_up_to_pow2(_M_num_bins);
if (num_bins < num_bins_cache)
if (_M_num_bins < __num_bins_cache)
{
#endif
// Now try the L2 cache, must fit into L2.
num_bins_cache =
static_cast<bin_index>(std::max<difference_type>(
1, n / (__s.L2_cache_size / sizeof(value_type))));
num_bins_cache = round_up_to_pow2(num_bins_cache);
__num_bins_cache =
static_cast<_BinIndex>(std::max<_DifferenceType>(
1, __n / (__s.L2_cache_size / sizeof(_ValueType))));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size.
num_bins = static_cast<bin_index>
(std::min(n, static_cast<difference_type>(num_bins_cache)));
_M_num_bins = static_cast<_BinIndex>
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin
num_bins =
std::min<difference_type>(__s.TLB_size / 2, num_bins);
_M_num_bins =
std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
#endif
num_bins = round_up_to_pow2(num_bins);
_M_num_bins = __round_up_to_pow2(_M_num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
}
#endif
int num_bits = __log2(num_bins);
int _M_num_bits = __log2(_M_num_bins);
if (num_bins > 1)
if (_M_num_bins > 1)
{
value_type* target = static_cast<value_type*>(
::operator new(sizeof(value_type) * n));
bin_index* oracles = new bin_index[n];
difference_type* dist0 = new difference_type[num_bins + 1],
* dist1 = new difference_type[num_bins + 1];
_ValueType* __target = static_cast<_ValueType*>(
::operator new(sizeof(_ValueType) * __n));
_BinIndex* __oracles = new _BinIndex[__n];
_DifferenceType* __dist0 = new _DifferenceType[_M_num_bins + 1],
* __dist1 = new _DifferenceType[_M_num_bins + 1];
for (int b = 0; b < num_bins + 1; ++b)
dist0[b] = 0;
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
__dist0[__b] = 0;
random_number bitrng(rng(0xFFFFFFFF));
_RandomNumber bitrng(__rng(0xFFFFFFFF));
for (difference_type i = 0; i < n; ++i)
for (_DifferenceType __i = 0; __i < __n; ++__i)
{
bin_index oracle = random_number_pow2(num_bits, bitrng);
oracles[i] = oracle;
_BinIndex __oracle = __random_number_pow2(_M_num_bits, bitrng);
__oracles[__i] = __oracle;
// To allow prefix (partial) sum.
++(dist0[oracle + 1]);
++(__dist0[__oracle + 1]);
}
// Sum up bins.
__gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
__gnu_sequential::partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0);
for (int b = 0; b < num_bins + 1; ++b)
dist1[b] = dist0[b];
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
__dist1[__b] = __dist0[__b];
// Distribute according to oracles.
for (difference_type i = 0; i < n; ++i)
::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
for (_DifferenceType __i = 0; __i < __n; ++__i)
::new(&(__target[(__dist0[__oracles[__i]])++])) _ValueType(*(__begin + __i));
for (int b = 0; b < num_bins; ++b)
for (int __b = 0; __b < _M_num_bins; ++__b)
{
sequential_random_shuffle(target + dist1[b],
target + dist1[b + 1],
rng);
__sequential_random_shuffle(__target + __dist1[__b],
__target + __dist1[__b + 1],
__rng);
}
// Copy elements back.
std::copy(target, target + n, begin);
std::copy(__target, __target + __n, __begin);
delete[] dist0;
delete[] dist1;
delete[] oracles;
::operator delete(target);
delete[] __dist0;
delete[] __dist1;
delete[] __oracles;
::operator delete(__target);
}
else
__gnu_sequential::random_shuffle(begin, end, rng);
__gnu_sequential::random_shuffle(__begin, __end, __rng);
}
/** @brief Parallel random public call.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param rng Random number generator to use.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __rng Random number generator to use.
*/
template<typename RandomAccessIterator, typename RandomNumberGenerator>
template<typename _RAIter, typename RandomNumberGenerator>
inline void
parallel_random_shuffle(RandomAccessIterator begin,
RandomAccessIterator end,
RandomNumberGenerator rng = random_number())
__parallel_random_shuffle(_RAIter __begin,
_RAIter __end,
RandomNumberGenerator __rng = _RandomNumber())
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
difference_type n = end - begin;
parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
__parallel_random_shuffle_drs(__begin, __end, __n, __get_max_threads(), __rng) ;
}
}

View File

@ -23,7 +23,7 @@
// <http://www.gnu.org/licenses/>.
/** @file parallel/search.h
* @brief Parallel implementation base for std::search() and
* @brief Parallel implementation __base for std::search() and
* std::search_n().
* This file is a GNU parallel extension to the Standard C++ Library.
*/
@ -42,130 +42,130 @@
namespace __gnu_parallel
{
/**
* @brief Precalculate advances for Knuth-Morris-Pratt algorithm.
* @param elements Begin iterator of sequence to search for.
* @param length Length of sequence to search for.
* @param advances Returned offsets.
* @brief Precalculate __advances for Knuth-Morris-Pratt algorithm.
* @param __elements Begin iterator of sequence to search for.
* @param __length Length of sequence to search for.
* @param __advances Returned __offsets.
*/
template<typename RandomAccessIterator, typename _DifferenceTp>
template<typename _RAIter, typename _DifferenceTp>
void
calc_borders(RandomAccessIterator elements, _DifferenceTp length,
_DifferenceTp* off)
__calc_borders(_RAIter __elements, _DifferenceTp __length,
_DifferenceTp* __off)
{
typedef _DifferenceTp difference_type;
typedef _DifferenceTp _DifferenceType;
off[0] = -1;
if (length > 1)
off[1] = 0;
difference_type k = 0;
for (difference_type j = 2; j <= length; j++)
__off[0] = -1;
if (__length > 1)
__off[1] = 0;
_DifferenceType __k = 0;
for (_DifferenceType __j = 2; __j <= __length; __j++)
{
while ((k >= 0) && !(elements[k] == elements[j-1]))
k = off[k];
off[j] = ++k;
while ((__k >= 0) && !(__elements[__k] == __elements[__j-1]))
__k = __off[__k];
__off[__j] = ++__k;
}
}
// Generic parallel find algorithm (requires random access iterator).
/** @brief Parallel std::search.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param end2 End iterator of second sequence.
* @param pred Find predicate.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __end2 End iterator of second sequence.
* @param __pred Find predicate.
* @return Place of finding in first sequences. */
template<typename _RandomAccessIterator1,
typename _RandomAccessIterator2,
typename Pred>
_RandomAccessIterator1
search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1,
_RandomAccessIterator2 begin2, _RandomAccessIterator2 end2,
Pred pred)
template<typename __RAIter1,
typename __RAIter2,
typename _Pred>
__RAIter1
__search_template(__RAIter1 __begin1, __RAIter1 __end1,
__RAIter2 __begin2, __RAIter2 __end2,
_Pred __pred)
{
typedef std::iterator_traits<_RandomAccessIterator1> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<__RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
_GLIBCXX_CALL((end1 - begin1) + (end2 - begin2));
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2));
difference_type pattern_length = end2 - begin2;
_DifferenceType __pattern_length = __end2 - __begin2;
// Pattern too short.
if(pattern_length <= 0)
return end1;
if(__pattern_length <= 0)
return __end1;
// Last point to start search.
difference_type input_length = (end1 - begin1) - pattern_length;
_DifferenceType __input_length = (__end1 - __begin1) - __pattern_length;
// Where is first occurrence of pattern? defaults to end.
difference_type result = (end1 - begin1);
difference_type *splitters;
_DifferenceType __result = (__end1 - __begin1);
_DifferenceType *__splitters;
// Pattern too long.
if (input_length < 0)
return end1;
if (__input_length < 0)
return __end1;
omp_lock_t result_lock;
omp_init_lock(&result_lock);
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
thread_index_t num_threads =
std::max<difference_type>(1,
std::min<difference_type>(input_length, get_max_threads()));
_ThreadIndex __num_threads =
std::max<_DifferenceType>(1,
std::min<_DifferenceType>(__input_length, __get_max_threads()));
difference_type advances[pattern_length];
calc_borders(begin2, pattern_length, advances);
_DifferenceType __advances[__pattern_length];
__calc_borders(__begin2, __pattern_length, __advances);
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
splitters = new difference_type[num_threads + 1];
equally_split(input_length, num_threads, splitters);
__num_threads = omp_get_num_threads();
__splitters = new _DifferenceType[__num_threads + 1];
equally_split(__input_length, __num_threads, __splitters);
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
difference_type start = splitters[iam], stop = splitters[iam + 1];
_DifferenceType __start = __splitters[__iam], __stop = __splitters[__iam + 1];
difference_type pos_in_pattern = 0;
bool found_pattern = false;
_DifferenceType __pos_in_pattern = 0;
bool __found_pattern = false;
while (start <= stop && !found_pattern)
while (__start <= __stop && !__found_pattern)
{
// Get new value of result.
#pragma omp flush(result)
#pragma omp flush(__result)
// No chance for this thread to find first occurrence.
if (result < start)
if (__result < __start)
break;
while (pred(begin1[start + pos_in_pattern],
begin2[pos_in_pattern]))
while (__pred(__begin1[__start + __pos_in_pattern],
__begin2[__pos_in_pattern]))
{
++pos_in_pattern;
if (pos_in_pattern == pattern_length)
++__pos_in_pattern;
if (__pos_in_pattern == __pattern_length)
{
// Found new candidate for result.
omp_set_lock(&result_lock);
result = std::min(result, start);
omp_unset_lock(&result_lock);
omp_set_lock(&__result_lock);
__result = std::min(__result, __start);
omp_unset_lock(&__result_lock);
found_pattern = true;
__found_pattern = true;
break;
}
}
// Make safe jump.
start += (pos_in_pattern - advances[pos_in_pattern]);
pos_in_pattern =
(advances[pos_in_pattern] < 0) ? 0 : advances[pos_in_pattern];
__start += (__pos_in_pattern - __advances[__pos_in_pattern]);
__pos_in_pattern =
(__advances[__pos_in_pattern] < 0) ? 0 : __advances[__pos_in_pattern];
}
} //parallel
omp_destroy_lock(&result_lock);
omp_destroy_lock(&__result_lock);
delete[] splitters;
delete[] __splitters;
// Return iterator on found element.
return (begin1 + result);
return (__begin1 + __result);
}
} // end namespace

View File

@ -41,482 +41,482 @@
namespace __gnu_parallel
{
template<typename InputIterator, typename OutputIterator>
OutputIterator
copy_tail(std::pair<InputIterator, InputIterator> b,
std::pair<InputIterator, InputIterator> e, OutputIterator r)
template<typename _IIter, typename _OutputIterator>
_OutputIterator
copy_tail(std::pair<_IIter, _IIter> __b,
std::pair<_IIter, _IIter> __e, _OutputIterator __r)
{
if (b.first != e.first)
if (__b.first != __e.first)
{
do
{
*r++ = *b.first++;
*__r++ = *__b.first++;
}
while (b.first != e.first);
while (__b.first != __e.first);
}
else
{
while (b.second != e.second)
*r++ = *b.second++;
while (__b.second != __e.second)
*__r++ = *__b.second++;
}
return r;
return __r;
}
template<typename InputIterator,
typename OutputIterator,
typename Comparator>
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
struct symmetric_difference_func
{
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
symmetric_difference_func(Comparator c) : comp(c) {}
symmetric_difference_func(_Compare __c) : __comp(__c) {}
Comparator comp;
_Compare __comp;
OutputIterator
invoke(InputIterator a, InputIterator b,
InputIterator c, InputIterator d,
OutputIterator r) const
_OutputIterator
_M_invoke(_IIter __a, _IIter __b,
_IIter __c, _IIter d,
_OutputIterator __r) const
{
while (a != b && c != d)
while (__a != __b && __c != d)
{
if (comp(*a, *c))
if (__comp(*__a, *__c))
{
*r = *a;
++a;
++r;
*__r = *__a;
++__a;
++__r;
}
else if (comp(*c, *a))
else if (__comp(*__c, *__a))
{
*r = *c;
++c;
++r;
*__r = *__c;
++__c;
++__r;
}
else
{
++a;
++c;
++__a;
++__c;
}
}
return std::copy(c, d, std::copy(a, b, r));
return std::copy(__c, d, std::copy(__a, __b, __r));
}
difference_type
count(InputIterator a, InputIterator b,
InputIterator c, InputIterator d) const
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
{
difference_type counter = 0;
_DifferenceType __counter = 0;
while (a != b && c != d)
while (__a != __b && __c != d)
{
if (comp(*a, *c))
if (__comp(*__a, *__c))
{
++a;
++counter;
++__a;
++__counter;
}
else if (comp(*c, *a))
else if (__comp(*__c, *__a))
{
++c;
++counter;
++__c;
++__counter;
}
else
{
++a;
++c;
++__a;
++__c;
}
}
return counter + (b - a) + (d - c);
return __counter + (__b - __a) + (d - __c);
}
OutputIterator
first_empty(InputIterator c, InputIterator d, OutputIterator out) const
{ return std::copy(c, d, out); }
_OutputIterator
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
{ return std::copy(__c, d, __out); }
OutputIterator
second_empty(InputIterator a, InputIterator b, OutputIterator out) const
{ return std::copy(a, b, out); }
_OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); }
};
template<typename InputIterator,
typename OutputIterator,
typename Comparator>
struct difference_func
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
struct __difference_func
{
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
difference_func(Comparator c) : comp(c) {}
__difference_func(_Compare __c) : __comp(__c) {}
Comparator comp;
_Compare __comp;
OutputIterator
invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
OutputIterator r) const
_OutputIterator
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
_OutputIterator __r) const
{
while (a != b && c != d)
while (__a != __b && __c != d)
{
if (comp(*a, *c))
if (__comp(*__a, *__c))
{
*r = *a;
++a;
++r;
*__r = *__a;
++__a;
++__r;
}
else if (comp(*c, *a))
{ ++c; }
else if (__comp(*__c, *__a))
{ ++__c; }
else
{
++a;
++c;
++__a;
++__c;
}
}
return std::copy(a, b, r);
return std::copy(__a, __b, __r);
}
difference_type
count(InputIterator a, InputIterator b,
InputIterator c, InputIterator d) const
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
{
difference_type counter = 0;
_DifferenceType __counter = 0;
while (a != b && c != d)
while (__a != __b && __c != d)
{
if (comp(*a, *c))
if (__comp(*__a, *__c))
{
++a;
++counter;
++__a;
++__counter;
}
else if (comp(*c, *a))
{ ++c; }
else if (__comp(*__c, *__a))
{ ++__c; }
else
{ ++a; ++c; }
{ ++__a; ++__c; }
}
return counter + (b - a);
return __counter + (__b - __a);
}
inline OutputIterator
first_empty(InputIterator c, InputIterator d, OutputIterator out) const
{ return out; }
inline _OutputIterator
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
{ return __out; }
inline OutputIterator
second_empty(InputIterator a, InputIterator b, OutputIterator out) const
{ return std::copy(a, b, out); }
inline _OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); }
};
template<typename InputIterator,
typename OutputIterator,
typename Comparator>
struct intersection_func
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
struct __intersection_func
{
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
intersection_func(Comparator c) : comp(c) {}
__intersection_func(_Compare __c) : __comp(__c) {}
Comparator comp;
_Compare __comp;
OutputIterator
invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
OutputIterator r) const
_OutputIterator
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
_OutputIterator __r) const
{
while (a != b && c != d)
while (__a != __b && __c != d)
{
if (comp(*a, *c))
{ ++a; }
else if (comp(*c, *a))
{ ++c; }
if (__comp(*__a, *__c))
{ ++__a; }
else if (__comp(*__c, *__a))
{ ++__c; }
else
{
*r = *a;
++a;
++c;
++r;
*__r = *__a;
++__a;
++__c;
++__r;
}
}
return r;
return __r;
}
difference_type
count(InputIterator a, InputIterator b,
InputIterator c, InputIterator d) const
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
{
difference_type counter = 0;
_DifferenceType __counter = 0;
while (a != b && c != d)
while (__a != __b && __c != d)
{
if (comp(*a, *c))
{ ++a; }
else if (comp(*c, *a))
{ ++c; }
if (__comp(*__a, *__c))
{ ++__a; }
else if (__comp(*__c, *__a))
{ ++__c; }
else
{
++a;
++c;
++counter;
++__a;
++__c;
++__counter;
}
}
return counter;
return __counter;
}
inline OutputIterator
first_empty(InputIterator c, InputIterator d, OutputIterator out) const
{ return out; }
inline _OutputIterator
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
{ return __out; }
inline OutputIterator
second_empty(InputIterator a, InputIterator b, OutputIterator out) const
{ return out; }
inline _OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return __out; }
};
template<class InputIterator, class OutputIterator, class Comparator>
struct union_func
template<class _IIter, class _OutputIterator, class _Compare>
struct __union_func
{
typedef typename std::iterator_traits<InputIterator>::difference_type
difference_type;
typedef typename std::iterator_traits<_IIter>::difference_type
_DifferenceType;
union_func(Comparator c) : comp(c) {}
__union_func(_Compare __c) : __comp(__c) {}
Comparator comp;
_Compare __comp;
OutputIterator
invoke(InputIterator a, const InputIterator b, InputIterator c,
const InputIterator d, OutputIterator r) const
_OutputIterator
_M_invoke(_IIter __a, const _IIter __b, _IIter __c,
const _IIter d, _OutputIterator __r) const
{
while (a != b && c != d)
while (__a != __b && __c != d)
{
if (comp(*a, *c))
if (__comp(*__a, *__c))
{
*r = *a;
++a;
*__r = *__a;
++__a;
}
else if (comp(*c, *a))
else if (__comp(*__c, *__a))
{
*r = *c;
++c;
*__r = *__c;
++__c;
}
else
{
*r = *a;
++a;
++c;
*__r = *__a;
++__a;
++__c;
}
++r;
++__r;
}
return std::copy(c, d, std::copy(a, b, r));
return std::copy(__c, d, std::copy(__a, __b, __r));
}
difference_type
count(InputIterator a, InputIterator b,
InputIterator c, InputIterator d) const
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
{
difference_type counter = 0;
_DifferenceType __counter = 0;
while (a != b && c != d)
while (__a != __b && __c != d)
{
if (comp(*a, *c))
{ ++a; }
else if (comp(*c, *a))
{ ++c; }
if (__comp(*__a, *__c))
{ ++__a; }
else if (__comp(*__c, *__a))
{ ++__c; }
else
{
++a;
++c;
++__a;
++__c;
}
++counter;
++__counter;
}
counter += (b - a);
counter += (d - c);
return counter;
__counter += (__b - __a);
__counter += (d - __c);
return __counter;
}
inline OutputIterator
first_empty(InputIterator c, InputIterator d, OutputIterator out) const
{ return std::copy(c, d, out); }
inline _OutputIterator
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
{ return std::copy(__c, d, __out); }
inline OutputIterator
second_empty(InputIterator a, InputIterator b, OutputIterator out) const
{ return std::copy(a, b, out); }
inline _OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); }
};
template<typename InputIterator,
typename OutputIterator,
template<typename _IIter,
typename _OutputIterator,
typename Operation>
OutputIterator
parallel_set_operation(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2,
OutputIterator result, Operation op)
_OutputIterator
__parallel_set_operation(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, Operation __op)
{
_GLIBCXX_CALL((end1 - begin1) + (end2 - begin2))
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2))
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
if (begin1 == end1)
return op.first_empty(begin2, end2, result);
if (__begin1 == __end1)
return __op.__first_empty(__begin2, __end2, __result);
if (begin2 == end2)
return op.second_empty(begin1, end1, result);
if (__begin2 == __end2)
return __op.__second_empty(__begin1, __end1, __result);
const difference_type size = (end1 - begin1) + (end2 - begin2);
const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2);
const iterator_pair sequence[ 2 ] =
{ std::make_pair(begin1, end1), std::make_pair(begin2, end2) } ;
OutputIterator return_value = result;
difference_type *borders;
iterator_pair *block_begins;
difference_type* lengths;
const _IteratorPair __sequence[ 2 ] =
{ std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) } ;
_OutputIterator return_value = __result;
_DifferenceType *__borders;
_IteratorPair *__block_begins;
_DifferenceType* __lengths;
thread_index_t num_threads =
std::min<difference_type>(get_max_threads(),
std::min(end1 - begin1, end2 - begin2));
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(),
std::min(__end1 - __begin1, __end2 - __begin2));
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
__num_threads = omp_get_num_threads();
borders = new difference_type[num_threads + 2];
equally_split(size, num_threads + 1, borders);
block_begins = new iterator_pair[num_threads + 1];
// Very start.
block_begins[0] = std::make_pair(begin1, begin2);
lengths = new difference_type[num_threads];
__borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders);
__block_begins = new _IteratorPair[__num_threads + 1];
// Very __start.
__block_begins[0] = std::make_pair(__begin1, __begin2);
__lengths = new _DifferenceType[__num_threads];
} //single
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
// Result from multiseq_partition.
InputIterator offset[2];
const difference_type rank = borders[iam + 1];
// _Result from multiseq_partition.
_IIter __offset[2];
const _DifferenceType __rank = __borders[__iam + 1];
multiseq_partition(sequence, sequence + 2, rank, offset, op.comp);
multiseq_partition(__sequence, __sequence + 2, __rank, __offset, __op.__comp);
// allowed to read?
// together
// *(offset[ 0 ] - 1) == *offset[ 1 ]
if (offset[ 0 ] != begin1 && offset[ 1 ] != end2
&& !op.comp(*(offset[ 0 ] - 1), *offset[ 1 ])
&& !op.comp(*offset[ 1 ], *(offset[ 0 ] - 1)))
// *(__offset[ 0 ] - 1) == *__offset[ 1 ]
if (__offset[ 0 ] != __begin1 && __offset[ 1 ] != __end2
&& !__op.__comp(*(__offset[ 0 ] - 1), *__offset[ 1 ])
&& !__op.__comp(*__offset[ 1 ], *(__offset[ 0 ] - 1)))
{
// Avoid split between globally equal elements: move one to
// front in first sequence.
--offset[ 0 ];
--__offset[ 0 ];
}
iterator_pair block_end = block_begins[ iam + 1 ] =
iterator_pair(offset[ 0 ], offset[ 1 ]);
_IteratorPair block_end = __block_begins[ __iam + 1 ] =
_IteratorPair(__offset[ 0 ], __offset[ 1 ]);
// Make sure all threads have their block_begin result written out.
# pragma omp barrier
iterator_pair block_begin = block_begins[ iam ];
_IteratorPair __block_begin = __block_begins[ __iam ];
// Begin working for the first block, while the others except
// the last start to count.
if (iam == 0)
if (__iam == 0)
{
// The first thread can copy already.
lengths[ iam ] = op.invoke(block_begin.first, block_end.first,
block_begin.second, block_end.second,
result)
- result;
__lengths[ __iam ] = __op._M_invoke(__block_begin.first, block_end.first,
__block_begin.second, block_end.second,
__result)
- __result;
}
else
{
lengths[ iam ] = op.count(block_begin.first, block_end.first,
block_begin.second, block_end.second);
__lengths[ __iam ] = __op.__count(__block_begin.first, block_end.first,
__block_begin.second, block_end.second);
}
// Make sure everyone wrote their lengths.
# pragma omp barrier
OutputIterator r = result;
_OutputIterator __r = __result;
if (iam == 0)
if (__iam == 0)
{
// Do the last block.
for (int i = 0; i < num_threads; ++i)
r += lengths[i];
for (int __i = 0; __i < __num_threads; ++__i)
__r += __lengths[__i];
block_begin = block_begins[num_threads];
__block_begin = __block_begins[__num_threads];
// Return the result iterator of the last block.
return_value = op.invoke(
block_begin.first, end1, block_begin.second, end2, r);
return_value = __op._M_invoke(
__block_begin.first, __end1, __block_begin.second, __end2, __r);
}
else
{
for (int i = 0; i < iam; ++i)
r += lengths[ i ];
for (int __i = 0; __i < __iam; ++__i)
__r += __lengths[ __i ];
// Reset begins for copy pass.
op.invoke(block_begin.first, block_end.first,
block_begin.second, block_end.second, r);
__op._M_invoke(__block_begin.first, block_end.first,
__block_begin.second, block_end.second, __r);
}
}
return return_value;
}
template<typename InputIterator,
typename OutputIterator,
typename Comparator>
inline OutputIterator
parallel_set_union(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2,
OutputIterator result, Comparator comp)
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_union(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare __comp)
{
return parallel_set_operation(begin1, end1, begin2, end2, result,
union_func< InputIterator, OutputIterator, Comparator>(comp));
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
__union_func< _IIter, _OutputIterator, _Compare>(__comp));
}
template<typename InputIterator,
typename OutputIterator,
typename Comparator>
inline OutputIterator
parallel_set_intersection(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2,
OutputIterator result, Comparator comp)
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_intersection(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare __comp)
{
return parallel_set_operation(begin1, end1, begin2, end2, result,
intersection_func<InputIterator, OutputIterator, Comparator>(comp));
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
__intersection_func<_IIter, _OutputIterator, _Compare>(__comp));
}
template<typename InputIterator,
typename OutputIterator,
typename Comparator>
inline OutputIterator
parallel_set_difference(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2,
OutputIterator result, Comparator comp)
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare __comp)
{
return parallel_set_operation(begin1, end1, begin2, end2, result,
difference_func<InputIterator, OutputIterator, Comparator>(comp));
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
__difference_func<_IIter, _OutputIterator, _Compare>(__comp));
}
template<typename InputIterator,
typename OutputIterator,
typename Comparator>
inline OutputIterator
parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2,
OutputIterator result, Comparator comp)
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare __comp)
{
return parallel_set_operation(begin1, end1, begin2, end2, result,
symmetric_difference_func<InputIterator, OutputIterator, Comparator>
(comp));
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
symmetric_difference_func<_IIter, _OutputIterator, _Compare>
(__comp));
}
}

View File

@ -30,7 +30,7 @@
* @section parallelization_decision
* The decision whether to run an algorithm in parallel.
*
* There are several ways the user can switch on and off the parallel
* There are several ways the user can switch on and __off the parallel
* execution of an algorithm, both at compile- and run-time.
*
* Only sequential execution can be forced at compile-time. This
@ -46,11 +46,11 @@
*
* To force sequential execution of an algorithm ultimately at
* compile-time, the user must add the tag
* __gnu_parallel::sequential_tag() to the end of the parameter list,
* gnu_parallel::sequential_tag() to the end of the parameter list,
* e. g.
*
* \code
* std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
* std::sort(__v.begin(), __v.end(), __gnu_parallel::sequential_tag());
* \endcode
*
* This is compatible with all overloaded algorithm variants. No
@ -60,18 +60,18 @@
* If the algorithm call is not forced to be executed sequentially
* at compile-time, the decision is made at run-time.
* The global variable __gnu_parallel::_Settings::algorithm_strategy
* is checked. It is a tristate variable corresponding to:
* is checked. _It is a tristate variable corresponding to:
*
* a. force_sequential, meaning the sequential algorithm is executed.
* b. force_parallel, meaning the parallel algorithm is executed.
* c. heuristic
* b. force_parallel, meaning the parallel algorithm is executed.
* c. heuristic
*
* For heuristic, the parallel algorithm implementation is called
* only if the input size is sufficiently large. For most
* algorithms, the input size is the (combined) length of the input
* sequence(s). The threshold can be set by the user, individually
* sequence(__s). The threshold can be set by the user, individually
* for each algorithm. The according variables are called
* __gnu_parallel::_Settings::[algorithm]_minimal_n .
* gnu_parallel::_Settings::[algorithm]_minimal_n .
*
* For some of the algorithms, there are even more tuning options,
* e. g. the ability to choose from multiple algorithm variants. See
@ -88,24 +88,24 @@
/**
* @brief Determine at compile(?)-time if the parallel variant of an
* algorithm should be called.
* @param c A condition that is convertible to bool that is overruled by
* @param __c A condition that is convertible to bool that is overruled by
* __gnu_parallel::_Settings::algorithm_strategy. Usually a decision
* based on the input size.
*/
#define _GLIBCXX_PARALLEL_CONDITION(c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel))
#define _GLIBCXX_PARALLEL_CONDITION(__c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::__get_max_threads() > 1 && (__c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel))
/*
inline bool
parallel_condition(bool c)
parallel_condition(bool __c)
{
bool ret = false;
const _Settings& s = _Settings::get();
if (s.algorithm_strategy != force_seqential)
const _Settings& __s = _Settings::get();
if (__s.algorithm_strategy != force_seqential)
{
if (s.algorithm_strategy == force_parallel)
if (__s.algorithm_strategy == force_parallel)
ret = true;
else
ret = get_max_threads() > 1 && c;
ret = __get_max_threads() > 1 && __c;
}
return ret;
}
@ -131,49 +131,49 @@ namespace __gnu_parallel
// Per-algorithm settings.
/// Minimal input size for accumulate.
sequence_index_t accumulate_minimal_n;
_SequenceIndex accumulate_minimal_n;
/// Minimal input size for adjacent_difference.
unsigned int adjacent_difference_minimal_n;
/// Minimal input size for count and count_if.
sequence_index_t count_minimal_n;
_SequenceIndex count_minimal_n;
/// Minimal input size for fill.
sequence_index_t fill_minimal_n;
_SequenceIndex fill_minimal_n;
/// Block size increase factor for find.
double find_increasing_factor;
/// Initial block size for find.
sequence_index_t find_initial_block_size;
_SequenceIndex find_initial_block_size;
/// Maximal block size for find.
sequence_index_t find_maximum_block_size;
_SequenceIndex find_maximum_block_size;
/// Start with looking for this many elements sequentially, for find.
sequence_index_t find_sequential_search_size;
_SequenceIndex find_sequential_search_size;
/// Minimal input size for for_each.
sequence_index_t for_each_minimal_n;
_SequenceIndex for_each_minimal_n;
/// Minimal input size for generate.
sequence_index_t generate_minimal_n;
_SequenceIndex generate_minimal_n;
/// Minimal input size for max_element.
sequence_index_t max_element_minimal_n;
_SequenceIndex max_element_minimal_n;
/// Minimal input size for merge.
sequence_index_t merge_minimal_n;
_SequenceIndex merge_minimal_n;
/// Oversampling factor for merge.
unsigned int merge_oversampling;
/// Minimal input size for min_element.
sequence_index_t min_element_minimal_n;
_SequenceIndex min_element_minimal_n;
/// Minimal input size for multiway_merge.
sequence_index_t multiway_merge_minimal_n;
_SequenceIndex multiway_merge_minimal_n;
/// Oversampling factor for multiway_merge.
int multiway_merge_minimal_k;
@ -182,22 +182,22 @@ namespace __gnu_parallel
unsigned int multiway_merge_oversampling;
/// Minimal input size for nth_element.
sequence_index_t nth_element_minimal_n;
_SequenceIndex nth_element_minimal_n;
/// Chunk size for partition.
sequence_index_t partition_chunk_size;
_SequenceIndex partition_chunk_size;
/// Chunk size for partition, relative to input size. If > 0.0,
/// this value overrides partition_chunk_size.
double partition_chunk_share;
/// Minimal input size for partition.
sequence_index_t partition_minimal_n;
_SequenceIndex partition_minimal_n;
/// Minimal input size for partial_sort.
sequence_index_t partial_sort_minimal_n;
_SequenceIndex partial_sort_minimal_n;
/// Ratio for partial_sum. Assume "sum and write result" to be
/// Ratio for partial_sum. Assume "sum and write __result" to be
/// this factor slower than just "sum".
float partial_sum_dilation;
@ -208,22 +208,22 @@ namespace __gnu_parallel
unsigned int random_shuffle_minimal_n;
/// Minimal input size for replace and replace_if.
sequence_index_t replace_minimal_n;
_SequenceIndex replace_minimal_n;
/// Minimal input size for set_difference.
sequence_index_t set_difference_minimal_n;
_SequenceIndex set_difference_minimal_n;
/// Minimal input size for set_intersection.
sequence_index_t set_intersection_minimal_n;
_SequenceIndex set_intersection_minimal_n;
/// Minimal input size for set_symmetric_difference.
sequence_index_t set_symmetric_difference_minimal_n;
_SequenceIndex set_symmetric_difference_minimal_n;
/// Minimal input size for set_union.
sequence_index_t set_union_minimal_n;
_SequenceIndex set_union_minimal_n;
/// Minimal input size for parallel sorting.
sequence_index_t sort_minimal_n;
_SequenceIndex sort_minimal_n;
/// Oversampling factor for parallel std::sort (MWMS).
unsigned int sort_mwms_oversampling;
@ -231,38 +231,38 @@ namespace __gnu_parallel
/// Such many samples to take to find a good pivot (quicksort).
unsigned int sort_qs_num_samples_preset;
/// Maximal subsequence length to switch to unbalanced base case.
/// Maximal subsequence __length to switch to unbalanced __base case.
/// Applies to std::sort with dynamically load-balanced quicksort.
sequence_index_t sort_qsb_base_case_maximal_n;
_SequenceIndex sort_qsb_base_case_maximal_n;
/// Minimal input size for parallel std::transform.
sequence_index_t transform_minimal_n;
_SequenceIndex transform_minimal_n;
/// Minimal input size for unique_copy.
sequence_index_t unique_copy_minimal_n;
_SequenceIndex unique_copy_minimal_n;
sequence_index_t workstealing_chunk_size;
_SequenceIndex workstealing_chunk_size;
// Hardware dependent tuning parameters.
/// Size of the L1 cache in bytes (underestimation).
/// size of the L1 cache in bytes (underestimation).
unsigned long long L1_cache_size;
/// Size of the L2 cache in bytes (underestimation).
/// size of the L2 cache in bytes (underestimation).
unsigned long long L2_cache_size;
/// Size of the Translation Lookaside Buffer (underestimation).
/// size of the Translation Lookaside Buffer (underestimation).
unsigned int TLB_size;
/// Overestimation of cache line size. Used to avoid false
/// sharing, i. e. elements of different threads are at least this
/// sharing, i.e. elements of different threads are at least this
/// amount apart.
unsigned int cache_line_size;
// Statistics.
/// The number of stolen ranges in load-balanced quicksort.
sequence_index_t qsb_steals;
_SequenceIndex qsb_steals;
/// Get the global settings.
_GLIBCXX_CONST static const _Settings&

View File

@ -55,174 +55,174 @@
namespace __gnu_parallel
{
//prototype
template<bool stable, typename RandomAccessIterator,
typename Comparator, typename Parallelism>
template<bool __stable, typename _RAIter,
typename _Compare, typename _Parallelism>
void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, Parallelism parallelism);
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, _Parallelism __parallelism);
/**
* @brief Choose multiway mergesort, splitting variant at run-time,
* for parallel sorting.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param comp Comparator.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
* @callgraph
*/
template<bool stable, typename RandomAccessIterator, typename Comparator>
template<bool __stable, typename _RAIter, typename _Compare>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, multiway_mergesort_tag parallelism)
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, multiway_mergesort_tag __parallelism)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
if(_Settings::get().sort_splitting == EXACT)
parallel_sort_mwms<stable, true>
(begin, end, comp, parallelism.get_num_threads());
parallel_sort_mwms<__stable, true>
(__begin, __end, __comp, __parallelism.__get_num_threads());
else
parallel_sort_mwms<stable, false>
(begin, end, comp, parallelism.get_num_threads());
parallel_sort_mwms<__stable, false>
(__begin, __end, __comp, __parallelism.__get_num_threads());
}
/**
* @brief Choose multiway mergesort with exact splitting,
* @brief Choose multiway mergesort with __exact splitting,
* for parallel sorting.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param comp Comparator.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
* @callgraph
*/
template<bool stable, typename RandomAccessIterator, typename Comparator>
template<bool __stable, typename _RAIter, typename _Compare>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, multiway_mergesort_exact_tag parallelism)
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, multiway_mergesort_exact_tag __parallelism)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
parallel_sort_mwms<stable, true>
(begin, end, comp, parallelism.get_num_threads());
parallel_sort_mwms<__stable, true>
(__begin, __end, __comp, __parallelism.__get_num_threads());
}
/**
* @brief Choose multiway mergesort with splitting by sampling,
* for parallel sorting.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param comp Comparator.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
* @callgraph
*/
template<bool stable, typename RandomAccessIterator, typename Comparator>
template<bool __stable, typename _RAIter, typename _Compare>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, multiway_mergesort_sampling_tag parallelism)
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, multiway_mergesort_sampling_tag __parallelism)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
parallel_sort_mwms<stable, false>
(begin, end, comp, parallelism.get_num_threads());
parallel_sort_mwms<__stable, false>
(__begin, __end, __comp, __parallelism.__get_num_threads());
}
/**
* @brief Choose quicksort for parallel sorting.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param comp Comparator.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
* @callgraph
*/
template<bool stable, typename RandomAccessIterator, typename Comparator>
template<bool __stable, typename _RAIter, typename _Compare>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, quicksort_tag parallelism)
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, quicksort_tag __parallelism)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
_GLIBCXX_PARALLEL_ASSERT(stable == false);
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
parallel_sort_qs(begin, end, comp, parallelism.get_num_threads());
__parallel_sort_qs(__begin, __end, __comp, __parallelism.__get_num_threads());
}
/**
* @brief Choose balanced quicksort for parallel sorting.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param comp Comparator.
* @param stable Sort stable.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
* @param __stable Sort __stable.
* @callgraph
*/
template<bool stable, typename RandomAccessIterator, typename Comparator>
template<bool __stable, typename _RAIter, typename _Compare>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, balanced_quicksort_tag parallelism)
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, balanced_quicksort_tag __parallelism)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
_GLIBCXX_PARALLEL_ASSERT(stable == false);
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
parallel_sort_qsb(begin, end, comp, parallelism.get_num_threads());
__parallel_sort_qsb(__begin, __end, __comp, __parallelism.__get_num_threads());
}
/**
* @brief Choose multiway mergesort with exact splitting,
* @brief Choose multiway mergesort with __exact splitting,
* for parallel sorting.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param comp Comparator.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
* @callgraph
*/
template<bool stable, typename RandomAccessIterator, typename Comparator>
template<bool __stable, typename _RAIter, typename _Compare>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, default_parallel_tag parallelism)
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, default_parallel_tag __parallelism)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
parallel_sort<stable>
(begin, end, comp,
multiway_mergesort_exact_tag(parallelism.get_num_threads()));
parallel_sort<__stable>
(__begin, __end, __comp,
multiway_mergesort_exact_tag(__parallelism.__get_num_threads()));
}
/**
* @brief Choose a parallel sorting algorithm.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param comp Comparator.
* @param stable Sort stable.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
* @param __stable Sort __stable.
* @callgraph
*/
template<bool stable, typename RandomAccessIterator, typename Comparator>
template<bool __stable, typename _RAIter, typename _Compare>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, parallel_tag parallelism)
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, parallel_tag __parallelism)
{
_GLIBCXX_CALL(end - begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
_GLIBCXX_CALL(__end - __begin)
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
if (false) ;
#if _GLIBCXX_MERGESORT
else if (stable || _Settings::get().sort_algorithm == MWMS)
else if (__stable || _Settings::get().sort_algorithm == MWMS)
{
if(_Settings::get().sort_splitting == EXACT)
parallel_sort_mwms<stable, true>
(begin, end, comp, parallelism.get_num_threads());
parallel_sort_mwms<__stable, true>
(__begin, __end, __comp, __parallelism.__get_num_threads());
else
parallel_sort_mwms<false, false>
(begin, end, comp, parallelism.get_num_threads());
(__begin, __end, __comp, __parallelism.__get_num_threads());
}
#endif
#if _GLIBCXX_QUICKSORT
else if (_Settings::get().sort_algorithm == QS)
parallel_sort_qs(begin, end, comp, parallelism.get_num_threads());
__parallel_sort_qs(__begin, __end, __comp, __parallelism.__get_num_threads());
#endif
#if _GLIBCXX_BAL_QUICKSORT
else if (_Settings::get().sort_algorithm == QS_BALANCED)
parallel_sort_qsb(begin, end, comp, parallelism.get_num_threads());
__parallel_sort_qsb(__begin, __end, __comp, __parallelism.__get_num_threads());
#endif
else
__gnu_sequential::sort(begin, end, comp);
__gnu_sequential::sort(__begin, __end, __comp);
}
} // end namespace __gnu_parallel

View File

@ -46,37 +46,37 @@ namespace __gnu_parallel
struct parallel_tag
{
private:
thread_index_t num_threads;
_ThreadIndex __num_threads;
public:
/** @brief Default constructor. Use default number of threads. */
parallel_tag()
{
this->num_threads = 0;
this->__num_threads = 0;
}
/** @brief Default constructor. Recommend number of threads to use.
* @param num_threads Desired number of threads. */
parallel_tag(thread_index_t num_threads)
* @param __num_threads Desired number of threads. */
parallel_tag(_ThreadIndex __num_threads)
{
this->num_threads = num_threads;
this->__num_threads = __num_threads;
}
/** @brief Find out desired number of threads.
/** @brief Find __out desired number of threads.
* @return Desired number of threads. */
inline thread_index_t get_num_threads()
inline _ThreadIndex __get_num_threads()
{
if(num_threads == 0)
if(__num_threads == 0)
return omp_get_max_threads();
else
return num_threads;
return __num_threads;
}
/** @brief Set the desired number of threads.
* @param num_threads Desired number of threads. */
inline void set_num_threads(thread_index_t num_threads)
* @param __num_threads Desired number of threads. */
inline void set_num_threads(_ThreadIndex __num_threads)
{
this->num_threads = num_threads;
this->__num_threads = __num_threads;
}
};
@ -85,8 +85,8 @@ namespace __gnu_parallel
struct default_parallel_tag : public parallel_tag
{
default_parallel_tag() { }
default_parallel_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
default_parallel_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Recommends parallel execution using dynamic
@ -111,21 +111,21 @@ namespace __gnu_parallel
/** @brief Forces parallel merging
* with exact splitting, at compile time. */
* with __exact splitting, at compile time. */
struct exact_tag : public parallel_tag
{
exact_tag() { }
exact_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
exact_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel merging
* with exact splitting, at compile time. */
* with __exact splitting, at compile time. */
struct sampling_tag : public parallel_tag
{
sampling_tag() { }
sampling_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
sampling_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
@ -134,17 +134,17 @@ namespace __gnu_parallel
struct multiway_mergesort_tag : public parallel_tag
{
multiway_mergesort_tag() { }
multiway_mergesort_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
multiway_mergesort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel sorting using multiway mergesort
* with exact splitting at compile time. */
* with __exact splitting at compile time. */
struct multiway_mergesort_exact_tag : public parallel_tag
{
multiway_mergesort_exact_tag() { }
multiway_mergesort_exact_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
multiway_mergesort_exact_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel sorting using multiway mergesort
@ -152,8 +152,8 @@ namespace __gnu_parallel
struct multiway_mergesort_sampling_tag : public parallel_tag
{
multiway_mergesort_sampling_tag() { }
multiway_mergesort_sampling_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
multiway_mergesort_sampling_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel sorting using unbalanced quicksort
@ -161,8 +161,8 @@ namespace __gnu_parallel
struct quicksort_tag : public parallel_tag
{
quicksort_tag() { }
quicksort_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
quicksort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel sorting using balanced quicksort
@ -170,8 +170,8 @@ namespace __gnu_parallel
struct balanced_quicksort_tag : public parallel_tag
{
balanced_quicksort_tag() { }
balanced_quicksort_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
balanced_quicksort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};

View File

@ -79,7 +79,7 @@ namespace __gnu_parallel
};
/// Merging algorithms:
// bubblesort-alike, loser-tree variants, enum sentinel.
// bubblesort-alike, loser-tree variants, enum __sentinel.
enum _MultiwayMergeAlgorithm
{
LOSER_TREE
@ -92,7 +92,7 @@ namespace __gnu_parallel
LINEAR
};
/// Sorting/merging algorithms: sampling, exact.
/// Sorting/merging algorithms: sampling, __exact.
enum _SplittingAlgorithm
{
SAMPLING,
@ -108,7 +108,7 @@ namespace __gnu_parallel
EQUAL_SPLIT
};
/// Integer Types.
/// _Integer Types.
// XXX need to use <cstdint>
/** @brief 16-bit signed integer. */
typedef short int16;
@ -129,27 +129,27 @@ namespace __gnu_parallel
typedef unsigned long long uint64;
/**
* @brief Unsigned integer to index elements.
* @brief Unsigned integer to index __elements.
* The total number of elements for each algorithm must fit into this type.
*/
typedef uint64 sequence_index_t;
typedef uint64 _SequenceIndex;
/**
* @brief Unsigned integer to index a thread number.
* The maximum thread number (for each processor) must fit into this type.
*/
typedef uint16 thread_index_t;
typedef uint16 _ThreadIndex;
// XXX atomics interface?
/// Longest compare-and-swappable integer type on this platform.
typedef int64 lcas_t;
typedef int64 _CASable;
// XXX numeric_limits::digits?
/// Number of bits of ::lcas_t.
static const int lcas_t_bits = sizeof(lcas_t) * 8;
/// Number of bits of ::_CASable.
static const int _CASable_bits = sizeof(_CASable) * 8;
/// ::lcas_t with the right half of bits set to 1.
static const lcas_t lcas_t_mask = ((lcas_t(1) << (lcas_t_bits / 2)) - 1);
/// ::_CASable with the right half of bits set to 1.
static const _CASable _CASable_mask = ((_CASable(1) << (_CASable_bits / 2)) - 1);
}
#endif /* _GLIBCXX_PARALLEL_TYPES_H */

View File

@ -38,153 +38,153 @@
namespace __gnu_parallel
{
/** @brief Parallel std::unique_copy(), w/o explicit equality predicate.
* @param first Begin iterator of input sequence.
* @param last End iterator of input sequence.
* @param result Begin iterator of result sequence.
* @param binary_pred Equality predicate.
* @return End iterator of result sequence. */
template<typename InputIterator,
class OutputIterator,
class BinaryPredicate>
OutputIterator
parallel_unique_copy(InputIterator first, InputIterator last,
OutputIterator result, BinaryPredicate binary_pred)
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate.
* @param __first Begin iterator of input sequence.
* @param __last End iterator of input sequence.
* @param __result Begin iterator of result __sequence.
* @param __binary_pred Equality predicate.
* @return End iterator of result __sequence. */
template<typename _IIter,
class _OutputIterator,
class _BinaryPredicate>
_OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result, _BinaryPredicate __binary_pred)
{
_GLIBCXX_CALL(last - first)
_GLIBCXX_CALL(__last - __first)
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type size = last - first;
_DifferenceType size = __last - __first;
if (size == 0)
return result;
return __result;
// Let the first thread process two parts.
difference_type *counter;
difference_type *borders;
_DifferenceType *__counter;
_DifferenceType *__borders;
thread_index_t num_threads = get_max_threads();
_ThreadIndex __num_threads = __get_max_threads();
// First part contains at least one element.
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
borders = new difference_type[num_threads + 2];
equally_split(size, num_threads + 1, borders);
counter = new difference_type[num_threads + 1];
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders);
__counter = new _DifferenceType[__num_threads + 1];
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
difference_type begin, end;
_DifferenceType __begin, __end;
// Check for length without duplicates
// Needed for position in output
difference_type i = 0;
OutputIterator out = result;
_DifferenceType __i = 0;
_OutputIterator __out = __result;
if (iam == 0)
if (__iam == 0)
{
begin = borders[0] + 1; // == 1
end = borders[iam + 1];
__begin = __borders[0] + 1; // == 1
__end = __borders[__iam + 1];
++i;
*out++ = *first;
++__i;
*__out++ = *__first;
for (InputIterator iter = first + begin; iter < first + end; ++iter)
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!binary_pred(*iter, *(iter-1)))
if (!__binary_pred(*iter, *(iter-1)))
{
++i;
*out++ = *iter;
++__i;
*__out++ = *iter;
}
}
}
else
{
begin = borders[iam]; //one part
end = borders[iam + 1];
__begin = __borders[__iam]; //one part
__end = __borders[__iam + 1];
for (InputIterator iter = first + begin; iter < first + end; ++iter)
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!binary_pred(*iter, *(iter - 1)))
++i;
if (!__binary_pred(*iter, *(iter - 1)))
++__i;
}
}
counter[iam] = i;
__counter[__iam] = __i;
// Last part still untouched.
difference_type begin_output;
_DifferenceType __begin_output;
# pragma omp barrier
// Store result in output on calculated positions.
begin_output = 0;
__begin_output = 0;
if (iam == 0)
if (__iam == 0)
{
for (int t = 0; t < num_threads; ++t)
begin_output += counter[t];
for (int __t = 0; __t < __num_threads; ++__t)
__begin_output += __counter[__t];
i = 0;
__i = 0;
OutputIterator iter_out = result + begin_output;
_OutputIterator __iter_out = __result + __begin_output;
begin = borders[num_threads];
end = size;
__begin = __borders[__num_threads];
__end = size;
for (InputIterator iter = first + begin; iter < first + end; ++iter)
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (iter == first || !binary_pred(*iter, *(iter - 1)))
if (iter == __first || !__binary_pred(*iter, *(iter - 1)))
{
++i;
*iter_out++ = *iter;
++__i;
*__iter_out++ = *iter;
}
}
counter[num_threads] = i;
__counter[__num_threads] = __i;
}
else
{
for (int t = 0; t < iam; t++)
begin_output += counter[t];
for (int __t = 0; __t < __iam; __t++)
__begin_output += __counter[__t];
OutputIterator iter_out = result + begin_output;
for (InputIterator iter = first + begin; iter < first + end; ++iter)
_OutputIterator __iter_out = __result + __begin_output;
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!binary_pred(*iter, *(iter-1)))
*iter_out++ = *iter;
if (!__binary_pred(*iter, *(iter-1)))
*__iter_out++ = *iter;
}
}
}
difference_type end_output = 0;
for (int t = 0; t < num_threads + 1; t++)
end_output += counter[t];
_DifferenceType __end_output = 0;
for (int __t = 0; __t < __num_threads + 1; __t++)
__end_output += __counter[__t];
delete[] borders;
delete[] __borders;
return result + end_output;
return __result + __end_output;
}
/** @brief Parallel std::unique_copy(), without explicit equality predicate
* @param first Begin iterator of input sequence.
* @param last End iterator of input sequence.
* @param result Begin iterator of result sequence.
* @return End iterator of result sequence. */
template<typename InputIterator, class OutputIterator>
inline OutputIterator
parallel_unique_copy(InputIterator first, InputIterator last,
OutputIterator result)
* @param __first Begin iterator of input sequence.
* @param __last End iterator of input sequence.
* @param __result Begin iterator of result __sequence.
* @return End iterator of result __sequence. */
template<typename _IIter, class _OutputIterator>
inline _OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result)
{
typedef typename std::iterator_traits<InputIterator>::value_type
value_type;
return parallel_unique_copy(first, last, result,
std::equal_to<value_type>());
typedef typename std::iterator_traits<_IIter>::value_type
_ValueType;
return __parallel_unique_copy(__first, __last, __result,
std::equal_to<_ValueType>());
}
}//namespace __gnu_parallel

View File

@ -49,257 +49,257 @@ namespace __gnu_parallel
#define _GLIBCXX_JOB_VOLATILE volatile
/** @brief One job for a certain thread. */
/** @brief One __job for a certain thread. */
template<typename _DifferenceTp>
struct Job
struct _Job
{
typedef _DifferenceTp difference_type;
typedef _DifferenceTp _DifferenceType;
/** @brief First element.
*
* Changed by owning and stealing thread. By stealing thread,
* always incremented. */
_GLIBCXX_JOB_VOLATILE difference_type first;
_GLIBCXX_JOB_VOLATILE _DifferenceType __first;
/** @brief Last element.
*
* Changed by owning thread only. */
_GLIBCXX_JOB_VOLATILE difference_type last;
_GLIBCXX_JOB_VOLATILE _DifferenceType __last;
/** @brief Number of elements, i. e. @c last-first+1.
/** @brief Number of elements, i.e. @__c __last-__first+1.
*
* Changed by owning thread only. */
_GLIBCXX_JOB_VOLATILE difference_type load;
_GLIBCXX_JOB_VOLATILE _DifferenceType __load;
};
/** @brief Work stealing algorithm for random access iterators.
*
* Uses O(1) additional memory. Synchronization at job lists is
* Uses O(1) additional memory. Synchronization at __job lists is
* done with atomic operations.
* @param begin Begin iterator of element sequence.
* @param end End iterator of element sequence.
* @param op User-supplied functor (comparator, predicate, adding
* @param __begin Begin iterator of element __sequence.
* @param __end End iterator of element __sequence.
* @param __op User-supplied functor (comparator, predicate, adding
* functor, ...).
* @param f Functor to "process" an element with op (depends on
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param r Functor to "add" a single result to the already
* processed elements (depends on functionality).
* @param base Base value for reduction.
* @param output Pointer to position where final result is written to
* @param bound Maximum number of elements processed (e. g. for
* @param __r Functor to "add" a single __result to the already
* processed __elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename RandomAccessIterator,
typename Op,
typename Fu,
typename Red,
typename Result>
Op
for_each_template_random_access_workstealing(RandomAccessIterator begin,
RandomAccessIterator end,
Op op, Fu& f, Red r,
Result base, Result& output,
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_workstealing(_RAIter __begin,
_RAIter __end,
_Op __op, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits
<RandomAccessIterator>::
difference_type bound)
<_RAIter>::
difference_type __bound)
{
_GLIBCXX_CALL(end - begin)
_GLIBCXX_CALL(__end - __begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
const _Settings& __s = _Settings::get();
difference_type chunk_size = static_cast<difference_type>(__s.workstealing_chunk_size);
_DifferenceType __chunk_size = static_cast<_DifferenceType>(__s.workstealing_chunk_size);
// How many jobs?
difference_type length = (bound < 0) ? (end - begin) : bound;
_DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
// To avoid false sharing in a cache line.
const int stride = __s.cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
const int __stride = __s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1;
// Total number of threads currently working.
thread_index_t busy = 0;
_ThreadIndex __busy = 0;
Job<difference_type> *job;
_Job<_DifferenceType> *__job;
omp_lock_t output_lock;
omp_init_lock(&output_lock);
omp_lock_t __output_lock;
omp_init_lock(&__output_lock);
// Write base value to output.
output = base;
// Write __base __value to output.
__output = __base;
// No more threads than jobs, at least one thread.
thread_index_t num_threads =
__gnu_parallel::max<thread_index_t>(1,
__gnu_parallel::min<difference_type>(length, get_max_threads()));
_ThreadIndex __num_threads =
__gnu_parallel::max<_ThreadIndex>(1,
__gnu_parallel::min<_DifferenceType>(__length, __get_max_threads()));
# pragma omp parallel shared(busy) num_threads(num_threads)
# pragma omp parallel shared(__busy) num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
__num_threads = omp_get_num_threads();
// Create job description array.
job = new Job<difference_type>[num_threads * stride];
// Create __job description array.
__job = new _Job<_DifferenceType>[__num_threads * __stride];
}
// Initialization phase.
// Flags for every thread if it is doing productive work.
bool iam_working = false;
bool __iam_working = false;
// Thread id.
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
// This job.
Job<difference_type>& my_job = job[iam * stride];
// This __job.
_Job<_DifferenceType>& __my_job = __job[__iam * __stride];
// Random number (for work stealing).
thread_index_t victim;
_ThreadIndex __victim;
// Local value for reduction.
Result result = Result();
_Result __result = _Result();
// Number of elements to steal in one attempt.
difference_type steal;
_DifferenceType __steal;
// Every thread has its own random number generator
// (modulo num_threads).
random_number rand_gen(iam, num_threads);
// (modulo __num_threads).
_RandomNumber rand_gen(__iam, __num_threads);
// This thread is currently working.
# pragma omp atomic
++busy;
++__busy;
iam_working = true;
__iam_working = true;
// How many jobs per thread? last thread gets the rest.
my_job.first =
static_cast<difference_type>(iam * (length / num_threads));
__my_job.__first =
static_cast<_DifferenceType>(__iam * (__length / __num_threads));
my_job.last = (iam == (num_threads - 1)) ?
(length - 1) : ((iam + 1) * (length / num_threads) - 1);
my_job.load = my_job.last - my_job.first + 1;
__my_job.__last = (__iam == (__num_threads - 1)) ?
(__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1);
__my_job.__load = __my_job.__last - __my_job.__first + 1;
// Init result with first value (to have a base value for reduction).
if (my_job.first <= my_job.last)
// Init __result with __first __value (to have a base value for reduction).
if (__my_job.__first <= __my_job.__last)
{
// Cannot use volatile variable directly.
difference_type my_first = my_job.first;
result = f(op, begin + my_first);
++my_job.first;
--my_job.load;
_DifferenceType __my_first = __my_job.__first;
__result = __f(__op, __begin + __my_first);
++__my_job.__first;
--__my_job.__load;
}
RandomAccessIterator current;
_RAIter __current;
# pragma omp barrier
// Actual work phase
// Work on own or stolen start
while (busy > 0)
// Work on own or stolen __start
while (__busy > 0)
{
// Work until no productive thread left.
# pragma omp flush(busy)
// Work until no productive thread __left.
# pragma omp flush(__busy)
// Thread has own work to do
while (my_job.first <= my_job.last)
while (__my_job.__first <= __my_job.__last)
{
// fetch-and-add call
// Reserve current job block (size chunk_size) in my queue.
difference_type current_job =
fetch_and_add<difference_type>(&(my_job.first), chunk_size);
// Reserve __current __job block (size __chunk_size) in my queue.
_DifferenceType current_job =
__fetch_and_add<_DifferenceType>(&(__my_job.__first), __chunk_size);
// Update load, to make the three values consistent,
// first might have been changed in the meantime
my_job.load = my_job.last - my_job.first + 1;
for (difference_type job_counter = 0;
job_counter < chunk_size && current_job <= my_job.last;
// Update __load, to make the three values consistent,
// __first might have been changed in the meantime
__my_job.__load = __my_job.__last - __my_job.__first + 1;
for (_DifferenceType job_counter = 0;
job_counter < __chunk_size && current_job <= __my_job.__last;
++job_counter)
{
// Yes: process it!
current = begin + current_job;
__current = __begin + current_job;
++current_job;
// Do actual work.
result = r(result, f(op, current));
__result = __r(__result, __f(__op, __current));
}
# pragma omp flush(busy)
# pragma omp flush(__busy)
}
// After reaching this point, a thread's job list is empty.
if (iam_working)
// After reaching this point, a thread's __job list is empty.
if (__iam_working)
{
// This thread no longer has work.
# pragma omp atomic
--busy;
--__busy;
iam_working = false;
__iam_working = false;
}
difference_type supposed_first, supposed_last, supposed_load;
_DifferenceType __supposed_first, __supposed_last, __supposed_load;
do
{
// Find random nonempty deque (not own), do consistency check.
yield();
# pragma omp flush(busy)
victim = rand_gen();
supposed_first = job[victim * stride].first;
supposed_last = job[victim * stride].last;
supposed_load = job[victim * stride].load;
__yield();
# pragma omp flush(__busy)
__victim = rand_gen();
__supposed_first = __job[__victim * __stride].__first;
__supposed_last = __job[__victim * __stride].__last;
__supposed_load = __job[__victim * __stride].__load;
}
while (busy > 0
&& ((supposed_load <= 0)
|| ((supposed_first + supposed_load - 1) != supposed_last)));
while (__busy > 0
&& ((__supposed_load <= 0)
|| ((__supposed_first + __supposed_load - 1) != __supposed_last)));
if (busy == 0)
if (__busy == 0)
break;
if (supposed_load > 0)
if (__supposed_load > 0)
{
// Has work and work to do.
// Number of elements to steal (at least one).
steal = (supposed_load < 2) ? 1 : supposed_load / 2;
__steal = (__supposed_load < 2) ? 1 : __supposed_load / 2;
// Push victim's start forward.
difference_type stolen_first =
fetch_and_add<difference_type>(
&(job[victim * stride].first), steal);
difference_type stolen_try =
stolen_first + steal - difference_type(1);
// Push __victim's __start forward.
_DifferenceType __stolen_first =
__fetch_and_add<_DifferenceType>(
&(__job[__victim * __stride].__first), __steal);
_DifferenceType stolen_try =
__stolen_first + __steal - _DifferenceType(1);
my_job.first = stolen_first;
my_job.last = __gnu_parallel::min(stolen_try, supposed_last);
my_job.load = my_job.last - my_job.first + 1;
__my_job.__first = __stolen_first;
__my_job.__last = __gnu_parallel::min(stolen_try, __supposed_last);
__my_job.__load = __my_job.__last - __my_job.__first + 1;
// Has potential work again.
# pragma omp atomic
++busy;
iam_working = true;
++__busy;
__iam_working = true;
# pragma omp flush(busy)
# pragma omp flush(__busy)
}
# pragma omp flush(busy)
} // end while busy > 0
// Add accumulated result to output.
omp_set_lock(&output_lock);
output = r(output, result);
omp_unset_lock(&output_lock);
# pragma omp flush(__busy)
} // end while __busy > 0
// Add accumulated __result to output.
omp_set_lock(&__output_lock);
__output = __r(__output, __result);
omp_unset_lock(&__output_lock);
}
delete[] job;
delete[] __job;
// Points to last element processed (needed as return value for
// some algorithms like transform)
f.finish_iterator = begin + length;
__f.finish_iterator = __begin + __length;
omp_destroy_lock(&output_lock);
omp_destroy_lock(&__output_lock);
return op;
return __op;
}
} // end namespace