mirror of git://gcc.gnu.org/git/gcc.git
algobase.h: Uglify internal identifiers.
2009-09-16 Johannes Singler <singler@ira.uka.de>
* include/parallel/algobase.h: Uglify internal identifiers.
* include/parallel/algo.h: Likewise.
* include/parallel/algorithmfwd.h: Likewise.
* include/parallel/balanced_quicksort.h: Likewise.
* include/parallel/base.h: Likewise.
* include/parallel/checkers.h: Likewise.
* include/parallel/compatibility.h: Likewise.
* include/parallel/compiletime_settings.: Likewise.
* include/parallel/equally_split.h: Likewise.
* include/parallel/features.h: Likewise.
* include/parallel/find.h: Likewise.
* include/parallel/find_selectors.h: Likewise.
* include/parallel/for_each.h: Likewise.
* include/parallel/for_each_selectors.h: Likewise.
* include/parallel/iterator.h: Likewise.
* include/parallel/list_partition.h: Likewise.
* include/parallel/losertree.h: Likewise.
* include/parallel/merge.h: Likewise.
* include/parallel/multiseq_selection.h: Likewise.
* include/parallel/multiway_merge.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/numeric: Likewise.
* include/parallel/numericfwd.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/partition.h: Likewise.
* include/parallel/queue.h: Likewise.
* include/parallel/quicksort.h: Likewise.
* include/parallel/random_number.h: Likewise.
* include/parallel/random_shuffle.h: Likewise.
* include/parallel/search.h: Likewise.
* include/parallel/set_operations.h: Likewise.
* include/parallel/settings.h: Likewise.
* include/parallel/sort.h: Likewise.
* include/parallel/tags.h: Likewise.
* include/parallel/types.h: Likewise.
* include/parallel/unique_copy.h: Likewise.
* include/parallel/workstealing.h: Likewise.
From-SVN: r151741
This commit is contained in:
parent
4075e7e8dc
commit
1acba85b37
|
|
@ -1,3 +1,49 @@
|
|||
2009-09-16 Johannes Singler <singler@ira.uka.de>
|
||||
|
||||
* include/parallel/algobase.h: Uglify internal identifiers.
|
||||
* include/parallel/algo.h: Likewise.
|
||||
* include/parallel/algorithm: Likewise.
|
||||
* include/parallel/algorithmfwd.h: Likewise.
|
||||
* include/parallel/balanced_quicksort.h: Likewise.
|
||||
* include/parallel/base.h: Likewise.
|
||||
* include/parallel/basic_iterator.h: Likewise.
|
||||
* include/parallel/checkers.h: Likewise.
|
||||
* include/parallel/compatibility.h: Likewise.
|
||||
* include/parallel/compiletime_settings.: Likewise.
|
||||
* include/parallel/equally_split.h: Likewise.
|
||||
* include/parallel/features.h: Likewise.
|
||||
* include/parallel/find.h: Likewise.
|
||||
* include/parallel/find_selectors.h: Likewise.
|
||||
* include/parallel/for_each.h: Likewise.
|
||||
* include/parallel/for_each_selectors.h: Likewise.
|
||||
* include/parallel/iterator.h: Likewise.
|
||||
* include/parallel/list_partition.h: Likewise.
|
||||
* include/parallel/losertree.h: Likewise.
|
||||
* include/parallel/merge.h: Likewise.
|
||||
* include/parallel/multiseq_selection.h: Likewise.
|
||||
* include/parallel/multiway_merge.h: Likewise.
|
||||
* include/parallel/multiway_mergesort.h: Likewise.
|
||||
* include/parallel/numeric: Likewise.
|
||||
* include/parallel/numericfwd.h: Likewise.
|
||||
* include/parallel/omp_loop.h: Likewise.
|
||||
* include/parallel/omp_loop_static.h: Likewise.
|
||||
* include/parallel/parallel.h: Likewise.
|
||||
* include/parallel/par_loop.h: Likewise.
|
||||
* include/parallel/partial_sum.h: Likewise.
|
||||
* include/parallel/partition.h: Likewise.
|
||||
* include/parallel/queue.h: Likewise.
|
||||
* include/parallel/quicksort.h: Likewise.
|
||||
* include/parallel/random_number.h: Likewise.
|
||||
* include/parallel/random_shuffle.h: Likewise.
|
||||
* include/parallel/search.h: Likewise.
|
||||
* include/parallel/set_operations.h: Likewise.
|
||||
* include/parallel/settings.h: Likewise.
|
||||
* include/parallel/sort.h: Likewise.
|
||||
* include/parallel/tags.h: Likewise.
|
||||
* include/parallel/types.h: Likewise.
|
||||
* include/parallel/unique_copy.h: Likewise.
|
||||
* include/parallel/workstealing.h: Likewise.
|
||||
|
||||
2009-09-14 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
PR libstdc++/41037
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -50,230 +50,230 @@ namespace __parallel
|
|||
// NB: equal and lexicographical_compare require mismatch.
|
||||
|
||||
// Sequential fallback
|
||||
template<typename InputIterator1, typename InputIterator2>
|
||||
inline pair<InputIterator1, InputIterator2>
|
||||
mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
inline pair<_IIter1, _IIter2>
|
||||
mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
|
||||
__gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2); }
|
||||
{ return _GLIBCXX_STD_P::mismatch(__begin1, __end1, __begin2); }
|
||||
|
||||
// Sequential fallback
|
||||
template<typename InputIterator1, typename InputIterator2,
|
||||
typename Predicate>
|
||||
inline pair<InputIterator1, InputIterator2>
|
||||
mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
|
||||
Predicate pred, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); }
|
||||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate>
|
||||
inline pair<_IIter1, _IIter2>
|
||||
mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
|
||||
_Predicate __pred, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::mismatch(__begin1, __end1, __begin2, __pred); }
|
||||
|
||||
// Sequential fallback for input iterator case
|
||||
template<typename InputIterator1, typename InputIterator2,
|
||||
typename Predicate, typename IteratorTag1, typename IteratorTag2>
|
||||
inline pair<InputIterator1, InputIterator2>
|
||||
mismatch_switch(InputIterator1 begin1, InputIterator1 end1,
|
||||
InputIterator2 begin2, Predicate pred, IteratorTag1,
|
||||
IteratorTag2)
|
||||
{ return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); }
|
||||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate, typename _IteratorTag1, typename _IteratorTag2>
|
||||
inline pair<_IIter1, _IIter2>
|
||||
__mismatch_switch(_IIter1 __begin1, _IIter1 __end1,
|
||||
_IIter2 __begin2, _Predicate __pred, _IteratorTag1,
|
||||
_IteratorTag2)
|
||||
{ return _GLIBCXX_STD_P::mismatch(__begin1, __end1, __begin2, __pred); }
|
||||
|
||||
// Parallel mismatch for random access iterators
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Predicate>
|
||||
pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
mismatch_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Predicate pred,
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Predicate>
|
||||
pair<_RAIter1, _RAIter2>
|
||||
__mismatch_switch(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Predicate __pred,
|
||||
random_access_iterator_tag, random_access_iterator_tag)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(true))
|
||||
{
|
||||
RandomAccessIterator1 res =
|
||||
__gnu_parallel::find_template(begin1, end1, begin2, pred,
|
||||
_RAIter1 __res =
|
||||
__gnu_parallel::__find_template(__begin1, __end1, __begin2, __pred,
|
||||
__gnu_parallel::
|
||||
mismatch_selector()).first;
|
||||
return make_pair(res , begin2 + (res - begin1));
|
||||
__mismatch_selector()).first;
|
||||
return make_pair(__res , __begin2 + (__res - __begin1));
|
||||
}
|
||||
else
|
||||
return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred);
|
||||
return _GLIBCXX_STD_P::mismatch(__begin1, __end1, __begin2, __pred);
|
||||
}
|
||||
|
||||
// Public interface
|
||||
template<typename InputIterator1, typename InputIterator2>
|
||||
inline pair<InputIterator1, InputIterator2>
|
||||
mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2)
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
inline pair<_IIter1, _IIter2>
|
||||
mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2)
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator1> iterator1_traits;
|
||||
typedef std::iterator_traits<InputIterator2> iterator2_traits;
|
||||
typedef typename iterator1_traits::value_type value1_type;
|
||||
typedef typename iterator2_traits::value_type value2_type;
|
||||
typedef typename iterator1_traits::iterator_category iterator1_category;
|
||||
typedef typename iterator2_traits::iterator_category iterator2_category;
|
||||
typedef std::iterator_traits<_IIter1> iterator1_traits;
|
||||
typedef std::iterator_traits<_IIter2> iterator2_traits;
|
||||
typedef typename iterator1_traits::value_type _ValueType1;
|
||||
typedef typename iterator2_traits::value_type _ValueType2;
|
||||
typedef typename iterator1_traits::iterator_category _IteratorCategory1;
|
||||
typedef typename iterator2_traits::iterator_category _IteratorCategory2;
|
||||
|
||||
typedef __gnu_parallel::equal_to<value1_type, value2_type> equal_to_type;
|
||||
typedef __gnu_parallel::equal_to<_ValueType1, _ValueType2> equal_to_type;
|
||||
|
||||
return mismatch_switch(begin1, end1, begin2, equal_to_type(),
|
||||
iterator1_category(), iterator2_category());
|
||||
return __mismatch_switch(__begin1, __end1, __begin2, equal_to_type(),
|
||||
_IteratorCategory1(), _IteratorCategory2());
|
||||
}
|
||||
|
||||
// Public interface
|
||||
template<typename InputIterator1, typename InputIterator2,
|
||||
typename Predicate>
|
||||
inline pair<InputIterator1, InputIterator2>
|
||||
mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
|
||||
Predicate pred)
|
||||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate>
|
||||
inline pair<_IIter1, _IIter2>
|
||||
mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
|
||||
_Predicate __pred)
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator1> iterator1_traits;
|
||||
typedef std::iterator_traits<InputIterator2> iterator2_traits;
|
||||
typedef typename iterator1_traits::iterator_category iterator1_category;
|
||||
typedef typename iterator2_traits::iterator_category iterator2_category;
|
||||
typedef std::iterator_traits<_IIter1> iterator1_traits;
|
||||
typedef std::iterator_traits<_IIter2> iterator2_traits;
|
||||
typedef typename iterator1_traits::iterator_category _IteratorCategory1;
|
||||
typedef typename iterator2_traits::iterator_category _IteratorCategory2;
|
||||
|
||||
return mismatch_switch(begin1, end1, begin2, pred, iterator1_category(),
|
||||
iterator2_category());
|
||||
return __mismatch_switch(__begin1, __end1, __begin2, __pred, _IteratorCategory1(),
|
||||
_IteratorCategory2());
|
||||
}
|
||||
|
||||
// Sequential fallback
|
||||
template<typename InputIterator1, typename InputIterator2>
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
inline bool
|
||||
equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
|
||||
equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
|
||||
__gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::equal(begin1, end1, begin2); }
|
||||
{ return _GLIBCXX_STD_P::equal(__begin1, __end1, __begin2); }
|
||||
|
||||
// Sequential fallback
|
||||
template<typename InputIterator1, typename InputIterator2,
|
||||
typename Predicate>
|
||||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate>
|
||||
inline bool
|
||||
equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
|
||||
Predicate pred, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::equal(begin1, end1, begin2, pred); }
|
||||
equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
|
||||
_Predicate __pred, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::equal(__begin1, __end1, __begin2, __pred); }
|
||||
|
||||
// Public interface
|
||||
template<typename InputIterator1, typename InputIterator2>
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
inline bool
|
||||
equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2)
|
||||
{ return mismatch(begin1, end1, begin2).first == end1; }
|
||||
equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2)
|
||||
{ return mismatch(__begin1, __end1, __begin2).first == __end1; }
|
||||
|
||||
// Public interface
|
||||
template<typename InputIterator1, typename InputIterator2,
|
||||
typename Predicate>
|
||||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate>
|
||||
inline bool
|
||||
equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
|
||||
Predicate pred)
|
||||
{ return mismatch(begin1, end1, begin2, pred).first == end1; }
|
||||
equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2,
|
||||
_Predicate __pred)
|
||||
{ return mismatch(__begin1, __end1, __begin2, __pred).first == __end1; }
|
||||
|
||||
// Sequential fallback
|
||||
template<typename InputIterator1, typename InputIterator2>
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
inline bool
|
||||
lexicographical_compare(InputIterator1 begin1, InputIterator1 end1,
|
||||
InputIterator2 begin2, InputIterator2 end2,
|
||||
lexicographical_compare(_IIter1 __begin1, _IIter1 __end1,
|
||||
_IIter2 __begin2, _IIter2 __end2,
|
||||
__gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1,
|
||||
begin2, end2); }
|
||||
{ return _GLIBCXX_STD_P::lexicographical_compare(__begin1, __end1,
|
||||
__begin2, __end2); }
|
||||
|
||||
// Sequential fallback
|
||||
template<typename InputIterator1, typename InputIterator2,
|
||||
typename Predicate>
|
||||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate>
|
||||
inline bool
|
||||
lexicographical_compare(InputIterator1 begin1, InputIterator1 end1,
|
||||
InputIterator2 begin2, InputIterator2 end2,
|
||||
Predicate pred, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1,
|
||||
begin2, end2, pred); }
|
||||
lexicographical_compare(_IIter1 __begin1, _IIter1 __end1,
|
||||
_IIter2 __begin2, _IIter2 __end2,
|
||||
_Predicate __pred, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::lexicographical_compare(__begin1, __end1,
|
||||
__begin2, __end2, __pred); }
|
||||
|
||||
// Sequential fallback for input iterator case
|
||||
template<typename InputIterator1, typename InputIterator2,
|
||||
typename Predicate, typename IteratorTag1, typename IteratorTag2>
|
||||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate, typename _IteratorTag1, typename _IteratorTag2>
|
||||
inline bool
|
||||
lexicographical_compare_switch(InputIterator1 begin1, InputIterator1 end1,
|
||||
InputIterator2 begin2, InputIterator2 end2,
|
||||
Predicate pred, IteratorTag1, IteratorTag2)
|
||||
{ return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1,
|
||||
begin2, end2, pred); }
|
||||
__lexicographical_compare_switch(_IIter1 __begin1, _IIter1 __end1,
|
||||
_IIter2 __begin2, _IIter2 __end2,
|
||||
_Predicate __pred, _IteratorTag1, _IteratorTag2)
|
||||
{ return _GLIBCXX_STD_P::lexicographical_compare(__begin1, __end1,
|
||||
__begin2, __end2, __pred); }
|
||||
|
||||
// Parallel lexicographical_compare for random access iterators
|
||||
// Limitation: Both valuetypes must be the same
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Predicate>
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Predicate>
|
||||
bool
|
||||
lexicographical_compare_switch(RandomAccessIterator1 begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2,
|
||||
RandomAccessIterator2 end2, Predicate pred,
|
||||
__lexicographical_compare_switch(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2,
|
||||
_RAIter2 __end2, _Predicate __pred,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(true))
|
||||
{
|
||||
typedef iterator_traits<RandomAccessIterator1> traits1_type;
|
||||
typedef typename traits1_type::value_type value1_type;
|
||||
typedef iterator_traits<_RAIter1> _TraitsType1;
|
||||
typedef typename _TraitsType1::value_type _ValueType1;
|
||||
|
||||
typedef iterator_traits<RandomAccessIterator2> traits2_type;
|
||||
typedef typename traits2_type::value_type value2_type;
|
||||
typedef iterator_traits<_RAIter2> _TraitsType2;
|
||||
typedef typename _TraitsType2::value_type _ValueType2;
|
||||
|
||||
typedef __gnu_parallel::equal_from_less<Predicate, value1_type,
|
||||
value2_type> equal_type;
|
||||
typedef __gnu_parallel::_EqualFromLess<_Predicate, _ValueType1,
|
||||
_ValueType2> _EqualFromLessCompare;
|
||||
|
||||
// Longer sequence in first place.
|
||||
if ((end1 - begin1) < (end2 - begin2))
|
||||
if ((__end1 - __begin1) < (__end2 - __begin2))
|
||||
{
|
||||
typedef pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
pair_type;
|
||||
pair_type mm = mismatch_switch(begin1, end1, begin2,
|
||||
equal_type(pred),
|
||||
typedef pair<_RAIter1, _RAIter2>
|
||||
_SpotType;
|
||||
_SpotType __mm = __mismatch_switch(__begin1, __end1, __begin2,
|
||||
_EqualFromLessCompare(__pred),
|
||||
random_access_iterator_tag(),
|
||||
random_access_iterator_tag());
|
||||
|
||||
return (mm.first == end1) || bool(pred(*mm.first, *mm.second));
|
||||
return (__mm.first == __end1) || bool(__pred(*__mm.first, *__mm.second));
|
||||
}
|
||||
else
|
||||
{
|
||||
typedef pair<RandomAccessIterator2, RandomAccessIterator1>
|
||||
pair_type;
|
||||
pair_type mm = mismatch_switch(begin2, end2, begin1,
|
||||
equal_type(pred),
|
||||
typedef pair<_RAIter2, _RAIter1>
|
||||
_SpotType;
|
||||
_SpotType __mm = __mismatch_switch(__begin2, __end2, __begin1,
|
||||
_EqualFromLessCompare(__pred),
|
||||
random_access_iterator_tag(),
|
||||
random_access_iterator_tag());
|
||||
|
||||
return (mm.first != end2) && bool(pred(*mm.second, *mm.first));
|
||||
return (__mm.first != __end2) && bool(__pred(*__mm.second, *__mm.first));
|
||||
}
|
||||
}
|
||||
else
|
||||
return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1,
|
||||
begin2, end2, pred);
|
||||
return _GLIBCXX_STD_P::lexicographical_compare(__begin1, __end1,
|
||||
__begin2, __end2, __pred);
|
||||
}
|
||||
|
||||
// Public interface
|
||||
template<typename InputIterator1, typename InputIterator2>
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
inline bool
|
||||
lexicographical_compare(InputIterator1 begin1, InputIterator1 end1,
|
||||
InputIterator2 begin2, InputIterator2 end2)
|
||||
lexicographical_compare(_IIter1 __begin1, _IIter1 __end1,
|
||||
_IIter2 __begin2, _IIter2 __end2)
|
||||
{
|
||||
typedef iterator_traits<InputIterator1> traits1_type;
|
||||
typedef typename traits1_type::value_type value1_type;
|
||||
typedef typename traits1_type::iterator_category iterator1_category;
|
||||
typedef iterator_traits<_IIter1> _TraitsType1;
|
||||
typedef typename _TraitsType1::value_type _ValueType1;
|
||||
typedef typename _TraitsType1::iterator_category _IteratorCategory1;
|
||||
|
||||
typedef iterator_traits<InputIterator2> traits2_type;
|
||||
typedef typename traits2_type::value_type value2_type;
|
||||
typedef typename traits2_type::iterator_category iterator2_category;
|
||||
typedef __gnu_parallel::less<value1_type, value2_type> less_type;
|
||||
typedef iterator_traits<_IIter2> _TraitsType2;
|
||||
typedef typename _TraitsType2::value_type _ValueType2;
|
||||
typedef typename _TraitsType2::iterator_category _IteratorCategory2;
|
||||
typedef __gnu_parallel::_Less<_ValueType1, _ValueType2> _LessType;
|
||||
|
||||
return lexicographical_compare_switch(begin1, end1, begin2, end2,
|
||||
less_type(), iterator1_category(),
|
||||
iterator2_category());
|
||||
return __lexicographical_compare_switch(__begin1, __end1, __begin2, __end2,
|
||||
_LessType(), _IteratorCategory1(),
|
||||
_IteratorCategory2());
|
||||
}
|
||||
|
||||
// Public interface
|
||||
template<typename InputIterator1, typename InputIterator2,
|
||||
typename Predicate>
|
||||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate>
|
||||
inline bool
|
||||
lexicographical_compare(InputIterator1 begin1, InputIterator1 end1,
|
||||
InputIterator2 begin2, InputIterator2 end2,
|
||||
Predicate pred)
|
||||
lexicographical_compare(_IIter1 __begin1, _IIter1 __end1,
|
||||
_IIter2 __begin2, _IIter2 __end2,
|
||||
_Predicate __pred)
|
||||
{
|
||||
typedef iterator_traits<InputIterator1> traits1_type;
|
||||
typedef typename traits1_type::iterator_category iterator1_category;
|
||||
typedef iterator_traits<_IIter1> _TraitsType1;
|
||||
typedef typename _TraitsType1::iterator_category _IteratorCategory1;
|
||||
|
||||
typedef iterator_traits<InputIterator2> traits2_type;
|
||||
typedef typename traits2_type::iterator_category iterator2_category;
|
||||
typedef iterator_traits<_IIter2> _TraitsType2;
|
||||
typedef typename _TraitsType2::iterator_category _IteratorCategory2;
|
||||
|
||||
return lexicographical_compare_switch(begin1, end1, begin2, end2, pred,
|
||||
iterator1_category(),
|
||||
iterator2_category());
|
||||
return __lexicographical_compare_switch(__begin1, __end1, __begin2, __end2, __pred,
|
||||
_IteratorCategory1(),
|
||||
_IteratorCategory2());
|
||||
}
|
||||
} // end namespace
|
||||
} // end namespace
|
||||
|
|
|
|||
|
|
@ -48,11 +48,11 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _IterTag>
|
||||
_FIter
|
||||
adjacent_find_switch(_FIter, _FIter, _IterTag);
|
||||
__adjacent_find_switch(_FIter, _FIter, _IterTag);
|
||||
|
||||
template<typename _RAIter>
|
||||
_RAIter
|
||||
adjacent_find_switch(_RAIter, _RAIter, random_access_iterator_tag);
|
||||
__adjacent_find_switch(_RAIter, _RAIter, random_access_iterator_tag);
|
||||
|
||||
|
||||
template<typename _FIter, typename _BiPredicate>
|
||||
|
|
@ -66,11 +66,11 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _BiPredicate, typename _IterTag>
|
||||
_FIter
|
||||
adjacent_find_switch(_FIter, _FIter, _BiPredicate, _IterTag);
|
||||
__adjacent_find_switch(_FIter, _FIter, _BiPredicate, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _BiPredicate>
|
||||
_RAIter
|
||||
adjacent_find_switch(_RAIter, _RAIter, _BiPredicate,
|
||||
__adjacent_find_switch(_RAIter, _RAIter, _BiPredicate,
|
||||
random_access_iterator_tag);
|
||||
|
||||
|
||||
|
|
@ -88,12 +88,12 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Tp, typename _IterTag>
|
||||
typename iterator_traits<_IIter>::difference_type
|
||||
count_switch(_IIter, _IIter, const _Tp&, _IterTag);
|
||||
__count_switch(_IIter, _IIter, const _Tp&, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Tp>
|
||||
typename iterator_traits<_RAIter>::difference_type
|
||||
count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_unbalanced);
|
||||
|
||||
|
||||
|
|
@ -111,12 +111,12 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Predicate, typename _IterTag>
|
||||
typename iterator_traits<_IIter>::difference_type
|
||||
count_if_switch(_IIter, _IIter, _Predicate, _IterTag);
|
||||
__count_if_switch(_IIter, _IIter, _Predicate, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Predicate>
|
||||
typename iterator_traits<_RAIter>::difference_type
|
||||
count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_unbalanced);
|
||||
|
||||
// algobase.h
|
||||
|
|
@ -124,18 +124,18 @@ namespace __parallel
|
|||
bool
|
||||
equal(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename Predicate>
|
||||
template<typename _IIter1, typename _IIter2, typename _Predicate>
|
||||
bool
|
||||
equal(_IIter1, _IIter1, _IIter2, Predicate,
|
||||
equal(_IIter1, _IIter1, _IIter2, _Predicate,
|
||||
__gnu_parallel::sequential_tag);
|
||||
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
bool
|
||||
equal(_IIter1, _IIter1, _IIter2);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename Predicate>
|
||||
template<typename _IIter1, typename _IIter2, typename _Predicate>
|
||||
bool
|
||||
equal(_IIter1, _IIter1, _IIter2, Predicate);
|
||||
equal(_IIter1, _IIter1, _IIter2, _Predicate);
|
||||
|
||||
template<typename _IIter, typename _Tp>
|
||||
_IIter
|
||||
|
|
@ -143,15 +143,15 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Tp>
|
||||
_IIter
|
||||
find(_IIter, _IIter, const _Tp& val);
|
||||
find(_IIter, _IIter, const _Tp& __val);
|
||||
|
||||
template<typename _IIter, typename _Tp, typename _IterTag>
|
||||
_IIter
|
||||
find_switch(_IIter, _IIter, const _Tp&, _IterTag);
|
||||
__find_switch(_IIter, _IIter, const _Tp&, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Tp>
|
||||
_RAIter
|
||||
find_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag);
|
||||
__find_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag);
|
||||
|
||||
template<typename _IIter, typename _Predicate>
|
||||
_IIter
|
||||
|
|
@ -163,11 +163,11 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Predicate, typename _IterTag>
|
||||
_IIter
|
||||
find_if_switch(_IIter, _IIter, _Predicate, _IterTag);
|
||||
__find_if_switch(_IIter, _IIter, _Predicate, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Predicate>
|
||||
_RAIter
|
||||
find_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag);
|
||||
__find_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag);
|
||||
|
||||
template<typename _IIter, typename _FIter>
|
||||
_IIter
|
||||
|
|
@ -190,18 +190,18 @@ namespace __parallel
|
|||
template<typename _IIter, typename _FIter,
|
||||
typename _IterTag1, typename _IterTag2>
|
||||
_IIter
|
||||
find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _IterTag1, _IterTag2);
|
||||
__find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _IterTag1, _IterTag2);
|
||||
|
||||
template<typename _RAIter, typename _FIter, typename _BiPredicate,
|
||||
typename _IterTag>
|
||||
_RAIter
|
||||
find_first_of_switch(_RAIter, _RAIter, _FIter, _FIter, _BiPredicate,
|
||||
__find_first_of_switch(_RAIter, _RAIter, _FIter, _FIter, _BiPredicate,
|
||||
random_access_iterator_tag, _IterTag);
|
||||
|
||||
template<typename _IIter, typename _FIter, typename _BiPredicate,
|
||||
typename _IterTag1, typename _IterTag2>
|
||||
_IIter
|
||||
find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _BiPredicate,
|
||||
__find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _BiPredicate,
|
||||
_IterTag1, _IterTag2);
|
||||
|
||||
|
||||
|
|
@ -219,12 +219,12 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Function, typename _IterTag>
|
||||
_Function
|
||||
for_each_switch(_IIter, _IIter, _Function, _IterTag);
|
||||
__for_each_switch(_IIter, _IIter, _Function, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Function>
|
||||
_Function
|
||||
for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_balanced);
|
||||
|
||||
|
||||
|
|
@ -242,12 +242,12 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _Generator, typename _IterTag>
|
||||
void
|
||||
generate_switch(_FIter, _FIter, _Generator, _IterTag);
|
||||
__generate_switch(_FIter, _FIter, _Generator, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Generator>
|
||||
void
|
||||
generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_balanced);
|
||||
|
||||
template<typename _OIter, typename _Size, typename _Generator>
|
||||
|
|
@ -265,12 +265,12 @@ namespace __parallel
|
|||
template<typename _OIter, typename _Size, typename _Generator,
|
||||
typename _IterTag>
|
||||
_OIter
|
||||
generate_n_switch(_OIter, _Size, _Generator, _IterTag);
|
||||
__generate_n_switch(_OIter, _Size, _Generator, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Size, typename _Generator>
|
||||
_RAIter
|
||||
generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_balanced);
|
||||
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
|
|
@ -294,12 +294,12 @@ namespace __parallel
|
|||
template<typename _IIter1, typename _IIter2,
|
||||
typename _Predicate, typename _IterTag1, typename _IterTag2>
|
||||
bool
|
||||
lexicographical_compare_switch(_IIter1, _IIter1, _IIter2, _IIter2,
|
||||
__lexicographical_compare_switch(_IIter1, _IIter1, _IIter2, _IIter2,
|
||||
_Predicate, _IterTag1, _IterTag2);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _Predicate>
|
||||
bool
|
||||
lexicographical_compare_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
__lexicographical_compare_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
_Predicate, random_access_iterator_tag,
|
||||
random_access_iterator_tag);
|
||||
|
||||
|
|
@ -324,12 +324,12 @@ namespace __parallel
|
|||
template<typename _IIter1, typename _IIter2, typename _Predicate,
|
||||
typename _IterTag1, typename _IterTag2>
|
||||
pair<_IIter1, _IIter2>
|
||||
mismatch_switch(_IIter1, _IIter1, _IIter2, _Predicate,
|
||||
__mismatch_switch(_IIter1, _IIter1, _IIter2, _Predicate,
|
||||
_IterTag1, _IterTag2);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _Predicate>
|
||||
pair<_RAIter1, _RAIter2>
|
||||
mismatch_switch(_RAIter1, _RAIter1, _RAIter2, _Predicate,
|
||||
__mismatch_switch(_RAIter1, _RAIter1, _RAIter2, _Predicate,
|
||||
random_access_iterator_tag, random_access_iterator_tag);
|
||||
|
||||
template<typename _FIter1, typename _FIter2>
|
||||
|
|
@ -351,23 +351,23 @@ namespace __parallel
|
|||
|
||||
template<typename _RAIter1, typename _RAIter2>
|
||||
_RAIter1
|
||||
search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
__search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
random_access_iterator_tag, random_access_iterator_tag);
|
||||
|
||||
template<typename _FIter1, typename _FIter2, typename _IterTag1,
|
||||
typename _IterTag2>
|
||||
_FIter1
|
||||
search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _IterTag1, _IterTag2);
|
||||
__search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _IterTag1, _IterTag2);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _BiPredicate>
|
||||
_RAIter1
|
||||
search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _BiPredicate,
|
||||
__search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _BiPredicate,
|
||||
random_access_iterator_tag, random_access_iterator_tag);
|
||||
|
||||
template<typename _FIter1, typename _FIter2, typename _BiPredicate,
|
||||
typename _IterTag1, typename _IterTag2>
|
||||
_FIter1
|
||||
search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate,
|
||||
__search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate,
|
||||
_IterTag1, _IterTag2);
|
||||
|
||||
template<typename _FIter, typename _Integer, typename _Tp>
|
||||
|
|
@ -393,42 +393,42 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Integer, typename _Tp,
|
||||
typename _BiPredicate>
|
||||
_RAIter
|
||||
search_n_switch(_RAIter, _RAIter, _Integer, const _Tp&,
|
||||
__search_n_switch(_RAIter, _RAIter, _Integer, const _Tp&,
|
||||
_BiPredicate, random_access_iterator_tag);
|
||||
|
||||
template<typename _FIter, typename _Integer, typename _Tp,
|
||||
typename _BiPredicate, typename _IterTag>
|
||||
_FIter
|
||||
search_n_switch(_FIter, _FIter, _Integer, const _Tp&,
|
||||
__search_n_switch(_FIter, _FIter, _Integer, const _Tp&,
|
||||
_BiPredicate, _IterTag);
|
||||
|
||||
|
||||
template<typename _IIter, typename _OIter, typename UnaryOperation>
|
||||
template<typename _IIter, typename _OIter, typename _UnaryOperation>
|
||||
_OIter
|
||||
transform(_IIter, _IIter, _OIter, UnaryOperation);
|
||||
transform(_IIter, _IIter, _OIter, _UnaryOperation);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename UnaryOperation>
|
||||
template<typename _IIter, typename _OIter, typename _UnaryOperation>
|
||||
_OIter
|
||||
transform(_IIter, _IIter, _OIter, UnaryOperation,
|
||||
transform(_IIter, _IIter, _OIter, _UnaryOperation,
|
||||
__gnu_parallel::sequential_tag);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename UnaryOperation>
|
||||
template<typename _IIter, typename _OIter, typename _UnaryOperation>
|
||||
_OIter
|
||||
transform(_IIter, _IIter, _OIter, UnaryOperation,
|
||||
transform(_IIter, _IIter, _OIter, _UnaryOperation,
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename UnaryOperation,
|
||||
template<typename _IIter, typename _OIter, typename _UnaryOperation,
|
||||
typename _IterTag1, typename _IterTag2>
|
||||
_OIter
|
||||
transform1_switch(_IIter, _IIter, _OIter, UnaryOperation,
|
||||
__transform1_switch(_IIter, _IIter, _OIter, _UnaryOperation,
|
||||
_IterTag1, _IterTag2);
|
||||
|
||||
|
||||
template<typename _RAIIter, typename _RAOIter, typename UnaryOperation>
|
||||
template<typename _RAIIter, typename _RAOIter, typename _UnaryOperation>
|
||||
_RAOIter
|
||||
transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation,
|
||||
__transform1_switch(_RAIIter, _RAIIter, _RAOIter, _UnaryOperation,
|
||||
random_access_iterator_tag, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_balanced);
|
||||
|
||||
|
||||
|
|
@ -452,17 +452,17 @@ namespace __parallel
|
|||
template<typename _RAIter1, typename _RAIter2, typename _RAIter3,
|
||||
typename _BiOperation>
|
||||
_RAIter3
|
||||
transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation,
|
||||
__transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation,
|
||||
random_access_iterator_tag, random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_balanced);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _OIter,
|
||||
typename _BiOperation, typename _Tag1,
|
||||
typename _Tag2, typename _Tag3>
|
||||
_OIter
|
||||
transform2_switch(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation,
|
||||
__transform2_switch(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation,
|
||||
_Tag1, _Tag2, _Tag3);
|
||||
|
||||
|
||||
|
|
@ -482,11 +482,11 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _Tp, typename _IterTag>
|
||||
void
|
||||
replace_switch(_FIter, _FIter, const _Tp&, const _Tp&, _IterTag);
|
||||
__replace_switch(_FIter, _FIter, const _Tp&, const _Tp&, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Tp>
|
||||
void
|
||||
replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&,
|
||||
__replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&,
|
||||
random_access_iterator_tag, __gnu_parallel::_Parallelism);
|
||||
|
||||
|
||||
|
|
@ -507,11 +507,11 @@ namespace __parallel
|
|||
template<typename _FIter, typename _Predicate, typename _Tp,
|
||||
typename _IterTag>
|
||||
void
|
||||
replace_if_switch(_FIter, _FIter, _Predicate, const _Tp&, _IterTag);
|
||||
__replace_if_switch(_FIter, _FIter, _Predicate, const _Tp&, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Predicate, typename _Tp>
|
||||
void
|
||||
replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&,
|
||||
__replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
|
|
@ -542,12 +542,12 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _Compare, typename _IterTag>
|
||||
_FIter
|
||||
max_element_switch(_FIter, _FIter, _Compare, _IterTag);
|
||||
__max_element_switch(_FIter, _FIter, _Compare, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Compare>
|
||||
_RAIter
|
||||
max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_balanced);
|
||||
|
||||
|
||||
|
|
@ -575,13 +575,13 @@ namespace __parallel
|
|||
typename _Compare, typename _IterTag1, typename _IterTag2,
|
||||
typename _IterTag3>
|
||||
_OIter
|
||||
merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare,
|
||||
__merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare,
|
||||
_IterTag1, _IterTag2, _IterTag3);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _OIter,
|
||||
typename _Compare>
|
||||
_OIter
|
||||
merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare,
|
||||
__merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare,
|
||||
random_access_iterator_tag, random_access_iterator_tag,
|
||||
random_access_iterator_tag);
|
||||
|
||||
|
|
@ -596,7 +596,7 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter>
|
||||
_FIter
|
||||
min_element(_FIter, _FIter, __gnu_parallel::_Parallelism parallelism_tag);
|
||||
min_element(_FIter, _FIter, __gnu_parallel::_Parallelism __parallelism_tag);
|
||||
|
||||
template<typename _FIter, typename _Compare>
|
||||
_FIter
|
||||
|
|
@ -612,12 +612,12 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _Compare, typename _IterTag>
|
||||
_FIter
|
||||
min_element_switch(_FIter, _FIter, _Compare, _IterTag);
|
||||
__min_element_switch(_FIter, _FIter, _Compare, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename _Compare>
|
||||
_RAIter
|
||||
min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_balanced);
|
||||
|
||||
template<typename _RAIter>
|
||||
|
|
@ -654,21 +654,21 @@ namespace __parallel
|
|||
void
|
||||
partial_sort(_RAIter, _RAIter, _RAIter);
|
||||
|
||||
template<typename _FIter, typename Predicate>
|
||||
template<typename _FIter, typename _Predicate>
|
||||
_FIter
|
||||
partition(_FIter, _FIter, Predicate, __gnu_parallel::sequential_tag);
|
||||
partition(_FIter, _FIter, _Predicate, __gnu_parallel::sequential_tag);
|
||||
|
||||
template<typename _FIter, typename Predicate>
|
||||
template<typename _FIter, typename _Predicate>
|
||||
_FIter
|
||||
partition(_FIter, _FIter, Predicate);
|
||||
partition(_FIter, _FIter, _Predicate);
|
||||
|
||||
template<typename _FIter, typename Predicate, typename _IterTag>
|
||||
template<typename _FIter, typename _Predicate, typename _IterTag>
|
||||
_FIter
|
||||
partition_switch(_FIter, _FIter, Predicate, _IterTag);
|
||||
__partition_switch(_FIter, _FIter, _Predicate, _IterTag);
|
||||
|
||||
template<typename _RAIter, typename Predicate>
|
||||
template<typename _RAIter, typename _Predicate>
|
||||
_RAIter
|
||||
partition_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag);
|
||||
__partition_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag);
|
||||
|
||||
template<typename _RAIter>
|
||||
void
|
||||
|
|
@ -693,9 +693,9 @@ namespace __parallel
|
|||
__gnu_parallel::sequential_tag);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _OIter,
|
||||
typename Predicate>
|
||||
typename _Predicate>
|
||||
_OIter
|
||||
set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate,
|
||||
set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate,
|
||||
__gnu_parallel::sequential_tag);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _OIter>
|
||||
|
|
@ -711,13 +711,13 @@ namespace __parallel
|
|||
typename _OIter, typename _IterTag1, typename _IterTag2,
|
||||
typename _IterTag3>
|
||||
_OIter
|
||||
set_union_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
|
||||
__set_union_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
|
||||
_Predicate, _IterTag1, _IterTag2, _IterTag3);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter,
|
||||
typename _Predicate>
|
||||
_Output_RAIter
|
||||
set_union_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _Output_RAIter,
|
||||
__set_union_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _Output_RAIter,
|
||||
_Predicate, random_access_iterator_tag,
|
||||
random_access_iterator_tag, random_access_iterator_tag);
|
||||
|
||||
|
|
@ -745,13 +745,13 @@ namespace __parallel
|
|||
typename _OIter, typename _IterTag1, typename _IterTag2,
|
||||
typename _IterTag3>
|
||||
_OIter
|
||||
set_intersection_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
|
||||
__set_intersection_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
|
||||
_Predicate, _IterTag1, _IterTag2, _IterTag3);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter,
|
||||
typename _Predicate>
|
||||
_Output_RAIter
|
||||
set_intersection_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
__set_intersection_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
_Output_RAIter, _Predicate,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
|
|
@ -782,14 +782,14 @@ namespace __parallel
|
|||
typename _OIter, typename _IterTag1, typename _IterTag2,
|
||||
typename _IterTag3>
|
||||
_OIter
|
||||
set_symmetric_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2,
|
||||
__set_symmetric_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2,
|
||||
_OIter, _Predicate, _IterTag1, _IterTag2,
|
||||
_IterTag3);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter,
|
||||
typename _Predicate>
|
||||
_Output_RAIter
|
||||
set_symmetric_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
__set_symmetric_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
_Output_RAIter, _Predicate,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
|
|
@ -820,13 +820,13 @@ namespace __parallel
|
|||
typename _OIter, typename _IterTag1, typename _IterTag2,
|
||||
typename _IterTag3>
|
||||
_OIter
|
||||
set_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
|
||||
__set_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter,
|
||||
_Predicate, _IterTag1, _IterTag2, _IterTag3);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter,
|
||||
typename _Predicate>
|
||||
_Output_RAIter
|
||||
set_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
__set_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2,
|
||||
_Output_RAIter, _Predicate,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
|
|
@ -885,12 +885,12 @@ namespace __parallel
|
|||
template<typename _IIter, typename _OIter, typename _Predicate,
|
||||
typename _IterTag1, typename _IterTag2>
|
||||
_OIter
|
||||
unique_copy_switch(_IIter, _IIter, _OIter, _Predicate,
|
||||
__unique_copy_switch(_IIter, _IIter, _OIter, _Predicate,
|
||||
_IterTag1, _IterTag2);
|
||||
|
||||
template<typename _RAIter, typename _RandomAccess_OIter, typename _Predicate>
|
||||
_RandomAccess_OIter
|
||||
unique_copy_switch(_RAIter, _RAIter, _RandomAccess_OIter, _Predicate,
|
||||
__unique_copy_switch(_RAIter, _RAIter, _RandomAccess_OIter, _Predicate,
|
||||
random_access_iterator_tag, random_access_iterator_tag);
|
||||
} // end namespace __parallel
|
||||
} // end namespace std
|
||||
|
|
|
|||
|
|
@ -58,171 +58,171 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Information local to one thread in the parallel quicksort run. */
|
||||
template<typename RandomAccessIterator>
|
||||
struct QSBThreadLocal
|
||||
template<typename _RAIter>
|
||||
struct _QSBThreadLocal
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
/** @brief Continuous part of the sequence, described by an
|
||||
iterator pair. */
|
||||
typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
|
||||
/** @brief Initial piece to work on. */
|
||||
Piece initial;
|
||||
_Piece _M_initial;
|
||||
|
||||
/** @brief Work-stealing queue. */
|
||||
RestrictedBoundedConcurrentQueue<Piece> leftover_parts;
|
||||
_RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts;
|
||||
|
||||
/** @brief Number of threads involved in this algorithm. */
|
||||
thread_index_t num_threads;
|
||||
_ThreadIndex __num_threads;
|
||||
|
||||
/** @brief Pointer to a counter of elements left over to sort. */
|
||||
volatile difference_type* elements_leftover;
|
||||
volatile _DifferenceType* _M_elements_leftover;
|
||||
|
||||
/** @brief The complete sequence to sort. */
|
||||
Piece global;
|
||||
_Piece _M_global;
|
||||
|
||||
/** @brief Constructor.
|
||||
* @param queue_size Size of the work-stealing queue. */
|
||||
QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { }
|
||||
* @param __queue_size size of the work-stealing queue. */
|
||||
_QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
|
||||
};
|
||||
|
||||
/** @brief Balanced quicksort divide step.
|
||||
* @param begin Begin iterator of subsequence.
|
||||
* @param end End iterator of subsequence.
|
||||
* @param comp Comparator.
|
||||
* @param num_threads Number of threads that are allowed to work on
|
||||
* @param __begin Begin iterator of subsequence.
|
||||
* @param __end End iterator of subsequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
* @pre @c (end-begin)>=1 */
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
typename std::iterator_traits<RandomAccessIterator>::difference_type
|
||||
qsb_divide(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, thread_index_t num_threads)
|
||||
* @pre @__c (__end-__begin)>=1 */
|
||||
template<typename _RAIter, typename _Compare>
|
||||
typename std::iterator_traits<_RAIter>::difference_type
|
||||
__qsb_divide(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, _ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_PARALLEL_ASSERT(num_threads > 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
|
||||
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
RandomAccessIterator pivot_pos =
|
||||
median_of_three_iterators(begin, begin + (end - begin) / 2,
|
||||
end - 1, comp);
|
||||
_RAIter __pivot_pos =
|
||||
__median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
|
||||
__end - 1, __comp);
|
||||
|
||||
#if defined(_GLIBCXX_ASSERTIONS)
|
||||
// Must be in between somewhere.
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
_GLIBCXX_PARALLEL_ASSERT(
|
||||
(!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
|
||||
|| (!comp(*pivot_pos, *begin) && !comp(*(end - 1), *pivot_pos))
|
||||
|| (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
|
||||
|| (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*(end - 1), *pivot_pos))
|
||||
|| (!comp(*pivot_pos, *(end - 1)) && !comp(*begin, *pivot_pos))
|
||||
|| (!comp(*pivot_pos, *(end - 1)) && !comp(*(begin + n / 2), *pivot_pos)));
|
||||
(!__comp(*__pivot_pos, *__begin) && !__comp(*(__begin + __n / 2), *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *__begin) && !__comp(*(__end - 1), *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && !__comp(*__begin, *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && !__comp(*(__end - 1), *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__end - 1)) && !__comp(*__begin, *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__end - 1)) && !__comp(*(__begin + __n / 2), *__pivot_pos)));
|
||||
#endif
|
||||
|
||||
// Swap pivot value to end.
|
||||
if (pivot_pos != (end - 1))
|
||||
std::swap(*pivot_pos, *(end - 1));
|
||||
pivot_pos = end - 1;
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
|
||||
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
|
||||
pred(comp, *pivot_pos);
|
||||
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
|
||||
// Divide, returning end - begin - 1 in the worst case.
|
||||
difference_type split_pos = parallel_partition(
|
||||
begin, end - 1, pred, num_threads);
|
||||
// Divide, returning __end - __begin - 1 in the worst case.
|
||||
_DifferenceType __split_pos = __parallel_partition(
|
||||
__begin, __end - 1, __pred, __num_threads);
|
||||
|
||||
// Swap back pivot to middle.
|
||||
std::swap(*(begin + split_pos), *pivot_pos);
|
||||
pivot_pos = begin + split_pos;
|
||||
std::swap(*(__begin + __split_pos), *__pivot_pos);
|
||||
__pivot_pos = __begin + __split_pos;
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
RandomAccessIterator r;
|
||||
for (r = begin; r != pivot_pos; ++r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos));
|
||||
for (; r != end; ++r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos));
|
||||
_RAIter __r;
|
||||
for (__r = __begin; __r != __pivot_pos; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
|
||||
for (; __r != __end; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
|
||||
#endif
|
||||
|
||||
return split_pos;
|
||||
return __split_pos;
|
||||
}
|
||||
|
||||
/** @brief Quicksort conquer step.
|
||||
* @param tls Array of thread-local storages.
|
||||
* @param begin Begin iterator of subsequence.
|
||||
* @param end End iterator of subsequence.
|
||||
* @param comp Comparator.
|
||||
* @param iam Number of the thread processing this function.
|
||||
* @param num_threads
|
||||
* @param __tls Array of thread-local storages.
|
||||
* @param __begin Begin iterator of subsequence.
|
||||
* @param __end End iterator of subsequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __iam Number of the thread processing this function.
|
||||
* @param __num_threads
|
||||
* Number of threads that are allowed to work on this part. */
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
qsb_conquer(QSBThreadLocal<RandomAccessIterator>** tls,
|
||||
RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp,
|
||||
thread_index_t iam, thread_index_t num_threads,
|
||||
bool parent_wait)
|
||||
__qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
|
||||
_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __iam, _ThreadIndex __num_threads,
|
||||
bool __parent_wait)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
if (num_threads <= 1 || n <= 1)
|
||||
if (__num_threads <= 1 || __n <= 1)
|
||||
{
|
||||
tls[iam]->initial.first = begin;
|
||||
tls[iam]->initial.second = end;
|
||||
__tls[__iam]->_M_initial.first = __begin;
|
||||
__tls[__iam]->_M_initial.second = __end;
|
||||
|
||||
qsb_local_sort_with_helping(tls, comp, iam, parent_wait);
|
||||
__qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Divide step.
|
||||
difference_type split_pos = qsb_divide(begin, end, comp, num_threads);
|
||||
_DifferenceType __split_pos = __qsb_divide(__begin, __end, __comp, __num_threads);
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin));
|
||||
_GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && __split_pos < (__end - __begin));
|
||||
#endif
|
||||
|
||||
thread_index_t num_threads_leftside =
|
||||
std::max<thread_index_t>(1, std::min<thread_index_t>(
|
||||
num_threads - 1, split_pos * num_threads / n));
|
||||
_ThreadIndex __num_threads_leftside =
|
||||
std::max<_ThreadIndex>(1, std::min<_ThreadIndex>(
|
||||
__num_threads - 1, __split_pos * __num_threads / __n));
|
||||
|
||||
# pragma omp atomic
|
||||
*tls[iam]->elements_leftover -= (difference_type)1;
|
||||
*__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
|
||||
|
||||
// Conquer step.
|
||||
# pragma omp parallel num_threads(2)
|
||||
{
|
||||
bool wait;
|
||||
bool __wait;
|
||||
if(omp_get_num_threads() < 2)
|
||||
wait = false;
|
||||
__wait = false;
|
||||
else
|
||||
wait = parent_wait;
|
||||
__wait = __parent_wait;
|
||||
|
||||
# pragma omp sections
|
||||
{
|
||||
# pragma omp section
|
||||
{
|
||||
qsb_conquer(tls, begin, begin + split_pos, comp,
|
||||
iam,
|
||||
num_threads_leftside,
|
||||
wait);
|
||||
wait = parent_wait;
|
||||
__qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
|
||||
__iam,
|
||||
__num_threads_leftside,
|
||||
__wait);
|
||||
__wait = __parent_wait;
|
||||
}
|
||||
// The pivot_pos is left in place, to ensure termination.
|
||||
# pragma omp section
|
||||
{
|
||||
qsb_conquer(tls, begin + split_pos + 1, end, comp,
|
||||
iam + num_threads_leftside,
|
||||
num_threads - num_threads_leftside,
|
||||
wait);
|
||||
wait = parent_wait;
|
||||
__qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
|
||||
__iam + __num_threads_leftside,
|
||||
__num_threads - __num_threads_leftside,
|
||||
__wait);
|
||||
__wait = __parent_wait;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -230,175 +230,175 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
|
||||
/**
|
||||
* @brief Quicksort step doing load-balanced local sort.
|
||||
* @param tls Array of thread-local storages.
|
||||
* @param comp Comparator.
|
||||
* @param iam Number of the thread processing this function.
|
||||
* @param __tls Array of thread-local storages.
|
||||
* @param __comp Comparator.
|
||||
* @param __iam Number of the thread processing this function.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
qsb_local_sort_with_helping(QSBThreadLocal<RandomAccessIterator>** tls,
|
||||
Comparator& comp, int iam, bool wait)
|
||||
__qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls,
|
||||
_Compare& __comp, int __iam, bool __wait)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
|
||||
QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam];
|
||||
_QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
|
||||
|
||||
difference_type base_case_n =
|
||||
_DifferenceType __base_case_n =
|
||||
_Settings::get().sort_qsb_base_case_maximal_n;
|
||||
if (base_case_n < 2)
|
||||
base_case_n = 2;
|
||||
thread_index_t num_threads = tl.num_threads;
|
||||
if (__base_case_n < 2)
|
||||
__base_case_n = 2;
|
||||
_ThreadIndex __num_threads = __tl.__num_threads;
|
||||
|
||||
// Every thread has its own random number generator.
|
||||
random_number rng(iam + 1);
|
||||
_RandomNumber __rng(__iam + 1);
|
||||
|
||||
Piece current = tl.initial;
|
||||
_Piece __current = __tl._M_initial;
|
||||
|
||||
difference_type elements_done = 0;
|
||||
_DifferenceType __elements_done = 0;
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
difference_type total_elements_done = 0;
|
||||
_DifferenceType __total_elements_done = 0;
|
||||
#endif
|
||||
|
||||
for (;;)
|
||||
{
|
||||
// Invariant: current must be a valid (maybe empty) range.
|
||||
RandomAccessIterator begin = current.first, end = current.second;
|
||||
difference_type n = end - begin;
|
||||
// Invariant: __current must be a valid (maybe empty) range.
|
||||
_RAIter __begin = __current.first, __end = __current.second;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
if (n > base_case_n)
|
||||
if (__n > __base_case_n)
|
||||
{
|
||||
// Divide.
|
||||
RandomAccessIterator pivot_pos = begin + rng(n);
|
||||
_RAIter __pivot_pos = __begin + __rng(__n);
|
||||
|
||||
// Swap pivot_pos value to end.
|
||||
if (pivot_pos != (end - 1))
|
||||
std::swap(*pivot_pos, *(end - 1));
|
||||
pivot_pos = end - 1;
|
||||
// Swap __pivot_pos value to end.
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
|
||||
__gnu_parallel::binder2nd
|
||||
<Comparator, value_type, value_type, bool>
|
||||
pred(comp, *pivot_pos);
|
||||
<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
|
||||
// Divide, leave pivot unchanged in last place.
|
||||
RandomAccessIterator split_pos1, split_pos2;
|
||||
split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
|
||||
_RAIter __split_pos1, __split_pos2;
|
||||
__split_pos1 = __gnu_sequential::partition(__begin, __end - 1, __pred);
|
||||
|
||||
// Left side: < pivot_pos; right side: >= pivot_pos.
|
||||
// Left side: < __pivot_pos; __right side: >= __pivot_pos.
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 && __split_pos1 < __end);
|
||||
#endif
|
||||
// Swap pivot back to middle.
|
||||
if (split_pos1 != pivot_pos)
|
||||
std::swap(*split_pos1, *pivot_pos);
|
||||
pivot_pos = split_pos1;
|
||||
if (__split_pos1 != __pivot_pos)
|
||||
std::swap(*__split_pos1, *__pivot_pos);
|
||||
__pivot_pos = __split_pos1;
|
||||
|
||||
// In case all elements are equal, split_pos1 == 0.
|
||||
if ((split_pos1 + 1 - begin) < (n >> 7)
|
||||
|| (end - split_pos1) < (n >> 7))
|
||||
// In case all elements are equal, __split_pos1 == 0.
|
||||
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|
||||
|| (__end - __split_pos1) < (__n >> 7))
|
||||
{
|
||||
// Very unequal split, one part smaller than one 128th
|
||||
// elements not strictly larger than the pivot.
|
||||
__gnu_parallel::unary_negate<__gnu_parallel::binder1st
|
||||
<Comparator, value_type, value_type, bool>, value_type>
|
||||
pred(__gnu_parallel::binder1st
|
||||
<Comparator, value_type, value_type, bool>(comp,
|
||||
*pivot_pos));
|
||||
__gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
|
||||
<_Compare, _ValueType, _ValueType, bool>, _ValueType>
|
||||
__pred(__gnu_parallel::__binder1st
|
||||
<_Compare, _ValueType, _ValueType, bool>(__comp,
|
||||
*__pivot_pos));
|
||||
|
||||
// Find other end of pivot-equal range.
|
||||
split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
|
||||
end, pred);
|
||||
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
|
||||
__end, __pred);
|
||||
}
|
||||
else
|
||||
// Only skip the pivot.
|
||||
split_pos2 = split_pos1 + 1;
|
||||
__split_pos2 = __split_pos1 + 1;
|
||||
|
||||
// Elements equal to pivot are done.
|
||||
elements_done += (split_pos2 - split_pos1);
|
||||
__elements_done += (__split_pos2 - __split_pos1);
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
total_elements_done += (split_pos2 - split_pos1);
|
||||
__total_elements_done += (__split_pos2 - __split_pos1);
|
||||
#endif
|
||||
// Always push larger part onto stack.
|
||||
if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
|
||||
if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
|
||||
{
|
||||
// Right side larger.
|
||||
if ((split_pos2) != end)
|
||||
tl.leftover_parts.push_front(std::make_pair(split_pos2,
|
||||
end));
|
||||
if ((__split_pos2) != __end)
|
||||
__tl._M_leftover_parts.push_front(std::make_pair(__split_pos2,
|
||||
__end));
|
||||
|
||||
//current.first = begin; //already set anyway
|
||||
current.second = split_pos1;
|
||||
//__current.first = __begin; //already set anyway
|
||||
__current.second = __split_pos1;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Left side larger.
|
||||
if (begin != split_pos1)
|
||||
tl.leftover_parts.push_front(std::make_pair(begin,
|
||||
split_pos1));
|
||||
if (__begin != __split_pos1)
|
||||
__tl._M_leftover_parts.push_front(std::make_pair(__begin,
|
||||
__split_pos1));
|
||||
|
||||
current.first = split_pos2;
|
||||
//current.second = end; //already set anyway
|
||||
__current.first = __split_pos2;
|
||||
//__current.second = __end; //already set anyway
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__gnu_sequential::sort(begin, end, comp);
|
||||
elements_done += n;
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
__elements_done += __n;
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
total_elements_done += n;
|
||||
__total_elements_done += __n;
|
||||
#endif
|
||||
|
||||
// Prefer own stack, small pieces.
|
||||
if (tl.leftover_parts.pop_front(current))
|
||||
if (__tl._M_leftover_parts.pop_front(__current))
|
||||
continue;
|
||||
|
||||
# pragma omp atomic
|
||||
*tl.elements_leftover -= elements_done;
|
||||
*__tl._M_elements_leftover -= __elements_done;
|
||||
|
||||
elements_done = 0;
|
||||
__elements_done = 0;
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
double search_start = omp_get_wtime();
|
||||
double __search_start = omp_get_wtime();
|
||||
#endif
|
||||
|
||||
// Look for new work.
|
||||
bool successfully_stolen = false;
|
||||
while (wait && *tl.elements_leftover > 0 && !successfully_stolen
|
||||
bool __successfully_stolen = false;
|
||||
while (__wait && *__tl._M_elements_leftover > 0 && !__successfully_stolen
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
// Possible dead-lock.
|
||||
&& (omp_get_wtime() < (search_start + 1.0))
|
||||
&& (omp_get_wtime() < (__search_start + 1.0))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
thread_index_t victim;
|
||||
victim = rng(num_threads);
|
||||
_ThreadIndex __victim;
|
||||
__victim = __rng(__num_threads);
|
||||
|
||||
// Large pieces.
|
||||
successfully_stolen = (victim != iam)
|
||||
&& tls[victim]->leftover_parts.pop_back(current);
|
||||
if (!successfully_stolen)
|
||||
yield();
|
||||
__successfully_stolen = (__victim != __iam)
|
||||
&& __tls[__victim]->_M_leftover_parts.pop_back(__current);
|
||||
if (!__successfully_stolen)
|
||||
__yield();
|
||||
#if !defined(__ICC) && !defined(__ECC)
|
||||
# pragma omp flush
|
||||
#endif
|
||||
}
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
if (omp_get_wtime() >= (search_start + 1.0))
|
||||
if (omp_get_wtime() >= (__search_start + 1.0))
|
||||
{
|
||||
sleep(1);
|
||||
_GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
|
||||
< (search_start + 1.0));
|
||||
< (__search_start + 1.0));
|
||||
}
|
||||
#endif
|
||||
if (!successfully_stolen)
|
||||
if (!__successfully_stolen)
|
||||
{
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
|
@ -407,70 +407,70 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
}
|
||||
|
||||
/** @brief Top-level quicksort routine.
|
||||
* @param begin Begin iterator of sequence.
|
||||
* @param end End iterator of sequence.
|
||||
* @param comp Comparator.
|
||||
* @param num_threads Number of threads that are allowed to work on
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp,
|
||||
thread_index_t num_threads)
|
||||
__parallel_sort_qsb(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
|
||||
typedef QSBThreadLocal<RandomAccessIterator> tls_type;
|
||||
typedef _QSBThreadLocal<_RAIter> _TLSType;
|
||||
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
if (n <= 1)
|
||||
if (__n <= 1)
|
||||
return;
|
||||
|
||||
// At least one element per processor.
|
||||
if (num_threads > n)
|
||||
num_threads = static_cast<thread_index_t>(n);
|
||||
if (__num_threads > __n)
|
||||
__num_threads = static_cast<_ThreadIndex>(__n);
|
||||
|
||||
// Initialize thread local storage
|
||||
tls_type** tls = new tls_type*[num_threads];
|
||||
difference_type queue_size = num_threads * (thread_index_t)(log2(n) + 1);
|
||||
for (thread_index_t t = 0; t < num_threads; ++t)
|
||||
tls[t] = new QSBThreadLocal<RandomAccessIterator>(queue_size);
|
||||
_TLSType** __tls = new _TLSType*[__num_threads];
|
||||
_DifferenceType __queue_size = __num_threads * (_ThreadIndex)(log2(__n) + 1);
|
||||
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
|
||||
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
|
||||
|
||||
// There can never be more than ceil(log2(n)) ranges on the stack, because
|
||||
// There can never be more than ceil(log2(__n)) ranges on the stack, because
|
||||
// 1. Only one processor pushes onto the stack
|
||||
// 2. The largest range has at most length n
|
||||
// 2. The largest range has at most length __n
|
||||
// 3. Each range is larger than half of the range remaining
|
||||
volatile difference_type elements_leftover = n;
|
||||
for (int i = 0; i < num_threads; ++i)
|
||||
volatile _DifferenceType _M_elements_leftover = __n;
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
tls[i]->elements_leftover = &elements_leftover;
|
||||
tls[i]->num_threads = num_threads;
|
||||
tls[i]->global = std::make_pair(begin, end);
|
||||
__tls[__i]->_M_elements_leftover = &_M_elements_leftover;
|
||||
__tls[__i]->__num_threads = __num_threads;
|
||||
__tls[__i]->_M_global = std::make_pair(__begin, __end);
|
||||
|
||||
// Just in case nothing is left to assign.
|
||||
tls[i]->initial = std::make_pair(end, end);
|
||||
__tls[__i]->_M_initial = std::make_pair(__end, __end);
|
||||
}
|
||||
|
||||
// Main recursion call.
|
||||
qsb_conquer(tls, begin, begin + n, comp, 0, num_threads, true);
|
||||
__qsb_conquer(__tls, __begin, __begin + __n, __comp, 0, __num_threads, true);
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
// All stack must be empty.
|
||||
Piece dummy;
|
||||
for (int i = 1; i < num_threads; ++i)
|
||||
_GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy));
|
||||
_Piece __dummy;
|
||||
for (int __i = 1; __i < __num_threads; ++__i)
|
||||
_GLIBCXX_PARALLEL_ASSERT(!__tls[__i]->_M_leftover_parts.pop_back(__dummy));
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < num_threads; ++i)
|
||||
delete tls[i];
|
||||
delete[] tls;
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
delete __tls[__i];
|
||||
delete[] __tls;
|
||||
}
|
||||
} // namespace __gnu_parallel
|
||||
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ namespace __gnu_parallel
|
|||
// and active, which imples that the OpenMP runtime is actually
|
||||
// going to be linked in.
|
||||
inline int
|
||||
get_max_threads()
|
||||
__get_max_threads()
|
||||
{
|
||||
int __i = omp_get_max_threads();
|
||||
return __i > 1 ? __i : 1;
|
||||
|
|
@ -90,91 +90,91 @@ namespace __gnu_parallel
|
|||
|
||||
|
||||
inline bool
|
||||
is_parallel(const _Parallelism __p) { return __p != sequential; }
|
||||
__is_parallel(const _Parallelism __p) { return __p != sequential; }
|
||||
|
||||
|
||||
// XXX remove std::duplicates from here if possible,
|
||||
// XXX but keep minimal dependencies.
|
||||
|
||||
/** @brief Calculates the rounded-down logarithm of @c n for base 2.
|
||||
* @param n Argument.
|
||||
/** @brief Calculates the rounded-down logarithm of @__c __n for base 2.
|
||||
* @param __n Argument.
|
||||
* @return Returns 0 for any argument <1.
|
||||
*/
|
||||
template<typename Size>
|
||||
inline Size
|
||||
__log2(Size n)
|
||||
template<typename _Size>
|
||||
inline _Size
|
||||
__log2(_Size __n)
|
||||
{
|
||||
Size k;
|
||||
for (k = 0; n > 1; n >>= 1)
|
||||
++k;
|
||||
return k;
|
||||
_Size __k;
|
||||
for (__k = 0; __n > 1; __n >>= 1)
|
||||
++__k;
|
||||
return __k;
|
||||
}
|
||||
|
||||
/** @brief Encode two integers into one __gnu_parallel::lcas_t.
|
||||
* @param a First integer, to be encoded in the most-significant @c
|
||||
* lcas_t_bits/2 bits.
|
||||
* @param b Second integer, to be encoded in the least-significant
|
||||
* @c lcas_t_bits/2 bits.
|
||||
* @return __gnu_parallel::lcas_t value encoding @c a and @c b.
|
||||
/** @brief Encode two integers into one __gnu_parallel::_CASable.
|
||||
* @param __a First integer, to be encoded in the most-significant @__c
|
||||
* _CASable_bits/2 bits.
|
||||
* @param __b Second integer, to be encoded in the least-significant
|
||||
* @__c _CASable_bits/2 bits.
|
||||
* @return __gnu_parallel::_CASable _M_value encoding @__c __a and @__c __b.
|
||||
* @see decode2
|
||||
*/
|
||||
inline lcas_t
|
||||
encode2(int a, int b) //must all be non-negative, actually
|
||||
inline _CASable
|
||||
__encode2(int __a, int __b) //must all be non-negative, actually
|
||||
{
|
||||
return (((lcas_t)a) << (lcas_t_bits / 2)) | (((lcas_t)b) << 0);
|
||||
return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0);
|
||||
}
|
||||
|
||||
/** @brief Decode two integers from one __gnu_parallel::lcas_t.
|
||||
* @param x __gnu_parallel::lcas_t to decode integers from.
|
||||
* @param a First integer, to be decoded from the most-significant
|
||||
* @c lcas_t_bits/2 bits of @c x.
|
||||
* @param b Second integer, to be encoded in the least-significant
|
||||
* @c lcas_t_bits/2 bits of @c x.
|
||||
* @see encode2
|
||||
/** @brief Decode two integers from one __gnu_parallel::_CASable.
|
||||
* @param __x __gnu_parallel::_CASable to decode integers from.
|
||||
* @param __a First integer, to be decoded from the most-significant
|
||||
* @__c _CASable_bits/2 bits of @__c __x.
|
||||
* @param __b Second integer, to be encoded in the least-significant
|
||||
* @__c _CASable_bits/2 bits of @__c __x.
|
||||
* @see __encode2
|
||||
*/
|
||||
inline void
|
||||
decode2(lcas_t x, int& a, int& b)
|
||||
decode2(_CASable __x, int& __a, int& __b)
|
||||
{
|
||||
a = (int)((x >> (lcas_t_bits / 2)) & lcas_t_mask);
|
||||
b = (int)((x >> 0 ) & lcas_t_mask);
|
||||
__a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask);
|
||||
__b = (int)((__x >> 0 ) & _CASable_mask);
|
||||
}
|
||||
|
||||
/** @brief Equivalent to std::min. */
|
||||
template<typename T>
|
||||
const T&
|
||||
min(const T& a, const T& b)
|
||||
{ return (a < b) ? a : b; }
|
||||
template<typename _Tp>
|
||||
const _Tp&
|
||||
min(const _Tp& __a, const _Tp& __b)
|
||||
{ return (__a < __b) ? __a : __b; }
|
||||
|
||||
/** @brief Equivalent to std::max. */
|
||||
template<typename T>
|
||||
const T&
|
||||
max(const T& a, const T& b)
|
||||
{ return (a > b) ? a : b; }
|
||||
template<typename _Tp>
|
||||
const _Tp&
|
||||
max(const _Tp& __a, const _Tp& __b)
|
||||
{ return (__a > __b) ? __a : __b; }
|
||||
|
||||
/** @brief Constructs predicate for equality from strict weak
|
||||
* ordering predicate
|
||||
*/
|
||||
// XXX comparator at the end, as per others
|
||||
template<typename Comparator, typename T1, typename T2>
|
||||
class equal_from_less : public std::binary_function<T1, T2, bool>
|
||||
template<typename _Compare, typename _T1, typename _T2>
|
||||
class _EqualFromLess : public std::binary_function<_T1, _T2, bool>
|
||||
{
|
||||
private:
|
||||
Comparator& comp;
|
||||
_Compare& __comp;
|
||||
|
||||
public:
|
||||
equal_from_less(Comparator& _comp) : comp(_comp) { }
|
||||
_EqualFromLess(_Compare& _comp) : __comp(_comp) { }
|
||||
|
||||
bool operator()(const T1& a, const T2& b)
|
||||
bool operator()(const _T1& __a, const _T2& __b)
|
||||
{
|
||||
return !comp(a, b) && !comp(b, a);
|
||||
return !__comp(__a, __b) && !__comp(__b, __a);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/** @brief Similar to std::binder1st,
|
||||
/** @brief Similar to std::__binder1st,
|
||||
* but giving the argument types explicitly. */
|
||||
template<typename _Predicate, typename argument_type>
|
||||
class unary_negate
|
||||
class __unary_negate
|
||||
: public std::unary_function<argument_type, bool>
|
||||
{
|
||||
protected:
|
||||
|
|
@ -182,93 +182,93 @@ template<typename _Predicate, typename argument_type>
|
|||
|
||||
public:
|
||||
explicit
|
||||
unary_negate(const _Predicate& __x) : _M_pred(__x) { }
|
||||
__unary_negate(const _Predicate& __x) : _M_pred(__x) { }
|
||||
|
||||
bool
|
||||
operator()(const argument_type& __x)
|
||||
{ return !_M_pred(__x); }
|
||||
};
|
||||
|
||||
/** @brief Similar to std::binder1st,
|
||||
/** @brief Similar to std::__binder1st,
|
||||
* but giving the argument types explicitly. */
|
||||
template<typename _Operation, typename first_argument_type,
|
||||
typename second_argument_type, typename result_type>
|
||||
class binder1st
|
||||
: public std::unary_function<second_argument_type, result_type>
|
||||
template<typename _Operation, typename _FirstArgumentType,
|
||||
typename _SecondArgumentType, typename _ResultType>
|
||||
class __binder1st
|
||||
: public std::unary_function<_SecondArgumentType, _ResultType>
|
||||
{
|
||||
protected:
|
||||
_Operation op;
|
||||
first_argument_type value;
|
||||
_Operation _M_op;
|
||||
_FirstArgumentType _M_value;
|
||||
|
||||
public:
|
||||
binder1st(const _Operation& __x,
|
||||
const first_argument_type& __y)
|
||||
: op(__x), value(__y) { }
|
||||
__binder1st(const _Operation& __x,
|
||||
const _FirstArgumentType& __y)
|
||||
: _M_op(__x), _M_value(__y) { }
|
||||
|
||||
result_type
|
||||
operator()(const second_argument_type& __x)
|
||||
{ return op(value, __x); }
|
||||
_ResultType
|
||||
operator()(const _SecondArgumentType& __x)
|
||||
{ return _M_op(_M_value, __x); }
|
||||
|
||||
// _GLIBCXX_RESOLVE_LIB_DEFECTS
|
||||
// 109. Missing binders for non-const sequence elements
|
||||
result_type
|
||||
operator()(second_argument_type& __x) const
|
||||
{ return op(value, __x); }
|
||||
// 109. Missing binders for non-const __sequence __elements
|
||||
_ResultType
|
||||
operator()(_SecondArgumentType& __x) const
|
||||
{ return _M_op(_M_value, __x); }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Similar to std::binder2nd, but giving the argument types
|
||||
* explicitly.
|
||||
*/
|
||||
template<typename _Operation, typename first_argument_type,
|
||||
typename second_argument_type, typename result_type>
|
||||
template<typename _Operation, typename _FirstArgumentType,
|
||||
typename _SecondArgumentType, typename _ResultType>
|
||||
class binder2nd
|
||||
: public std::unary_function<first_argument_type, result_type>
|
||||
: public std::unary_function<_FirstArgumentType, _ResultType>
|
||||
{
|
||||
protected:
|
||||
_Operation op;
|
||||
second_argument_type value;
|
||||
_Operation _M_op;
|
||||
_SecondArgumentType _M_value;
|
||||
|
||||
public:
|
||||
binder2nd(const _Operation& __x,
|
||||
const second_argument_type& __y)
|
||||
: op(__x), value(__y) { }
|
||||
const _SecondArgumentType& __y)
|
||||
: _M_op(__x), _M_value(__y) { }
|
||||
|
||||
result_type
|
||||
operator()(const first_argument_type& __x) const
|
||||
{ return op(__x, value); }
|
||||
_ResultType
|
||||
operator()(const _FirstArgumentType& __x) const
|
||||
{ return _M_op(__x, _M_value); }
|
||||
|
||||
// _GLIBCXX_RESOLVE_LIB_DEFECTS
|
||||
// 109. Missing binders for non-const sequence elements
|
||||
result_type
|
||||
operator()(first_argument_type& __x)
|
||||
{ return op(__x, value); }
|
||||
// 109. Missing binders for non-const __sequence __elements
|
||||
_ResultType
|
||||
operator()(_FirstArgumentType& __x)
|
||||
{ return _M_op(__x, _M_value); }
|
||||
};
|
||||
|
||||
/** @brief Similar to std::equal_to, but allows two different types. */
|
||||
template<typename T1, typename T2>
|
||||
struct equal_to : std::binary_function<T1, T2, bool>
|
||||
template<typename _T1, typename _T2>
|
||||
struct equal_to : std::binary_function<_T1, _T2, bool>
|
||||
{
|
||||
bool operator()(const T1& t1, const T2& t2) const
|
||||
{ return t1 == t2; }
|
||||
bool operator()(const _T1& __t1, const _T2& __t2) const
|
||||
{ return __t1 == __t2; }
|
||||
};
|
||||
|
||||
/** @brief Similar to std::less, but allows two different types. */
|
||||
template<typename T1, typename T2>
|
||||
struct less : std::binary_function<T1, T2, bool>
|
||||
template<typename _T1, typename _T2>
|
||||
struct _Less : std::binary_function<_T1, _T2, bool>
|
||||
{
|
||||
bool
|
||||
operator()(const T1& t1, const T2& t2) const
|
||||
{ return t1 < t2; }
|
||||
operator()(const _T1& __t1, const _T2& __t2) const
|
||||
{ return __t1 < __t2; }
|
||||
|
||||
bool
|
||||
operator()(const T2& t2, const T1& t1) const
|
||||
{ return t2 < t1; }
|
||||
operator()(const _T2& __t2, const _T1& __t1) const
|
||||
{ return __t2 < __t1; }
|
||||
};
|
||||
|
||||
// Partial specialization for one type. Same as std::less.
|
||||
template<typename _Tp>
|
||||
struct less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
|
||||
struct _Less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
|
||||
{
|
||||
bool
|
||||
operator()(const _Tp& __x, const _Tp& __y) const
|
||||
|
|
@ -278,24 +278,24 @@ struct less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
|
|||
|
||||
/** @brief Similar to std::plus, but allows two different types. */
|
||||
template<typename _Tp1, typename _Tp2>
|
||||
struct plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
|
||||
struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
|
||||
{
|
||||
typedef __typeof__(*static_cast<_Tp1*>(NULL)
|
||||
+ *static_cast<_Tp2*>(NULL)) result;
|
||||
+ *static_cast<_Tp2*>(NULL)) __result;
|
||||
|
||||
result
|
||||
__result
|
||||
operator()(const _Tp1& __x, const _Tp2& __y) const
|
||||
{ return __x + __y; }
|
||||
};
|
||||
|
||||
// Partial specialization for one type. Same as std::plus.
|
||||
template<typename _Tp>
|
||||
struct plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
|
||||
struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
|
||||
{
|
||||
typedef __typeof__(*static_cast<_Tp*>(NULL)
|
||||
+ *static_cast<_Tp*>(NULL)) result;
|
||||
+ *static_cast<_Tp*>(NULL)) __result;
|
||||
|
||||
result
|
||||
__result
|
||||
operator()(const _Tp& __x, const _Tp& __y) const
|
||||
{ return __x + __y; }
|
||||
};
|
||||
|
|
@ -303,164 +303,164 @@ template<typename _Tp>
|
|||
|
||||
/** @brief Similar to std::multiplies, but allows two different types. */
|
||||
template<typename _Tp1, typename _Tp2>
|
||||
struct multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
|
||||
struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
|
||||
{
|
||||
typedef __typeof__(*static_cast<_Tp1*>(NULL)
|
||||
* *static_cast<_Tp2*>(NULL)) result;
|
||||
* *static_cast<_Tp2*>(NULL)) __result;
|
||||
|
||||
result
|
||||
__result
|
||||
operator()(const _Tp1& __x, const _Tp2& __y) const
|
||||
{ return __x * __y; }
|
||||
};
|
||||
|
||||
// Partial specialization for one type. Same as std::multiplies.
|
||||
template<typename _Tp>
|
||||
struct multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
|
||||
struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
|
||||
{
|
||||
typedef __typeof__(*static_cast<_Tp*>(NULL)
|
||||
* *static_cast<_Tp*>(NULL)) result;
|
||||
* *static_cast<_Tp*>(NULL)) __result;
|
||||
|
||||
result
|
||||
__result
|
||||
operator()(const _Tp& __x, const _Tp& __y) const
|
||||
{ return __x * __y; }
|
||||
};
|
||||
|
||||
|
||||
template<typename T, typename _DifferenceTp>
|
||||
class pseudo_sequence;
|
||||
template<typename _Tp, typename _DifferenceTp>
|
||||
class _PseudoSequence;
|
||||
|
||||
/** @brief Iterator associated with __gnu_parallel::pseudo_sequence.
|
||||
/** @brief _Iterator associated with __gnu_parallel::_PseudoSequence.
|
||||
* If features the usual random-access iterator functionality.
|
||||
* @param T Sequence value type.
|
||||
* @param difference_type Sequence difference type.
|
||||
* @param _Tp Sequence _M_value type.
|
||||
* @param _DifferenceType Sequence difference type.
|
||||
*/
|
||||
template<typename T, typename _DifferenceTp>
|
||||
class pseudo_sequence_iterator
|
||||
template<typename _Tp, typename _DifferenceTp>
|
||||
class _PseudoSequenceIterator
|
||||
{
|
||||
public:
|
||||
typedef _DifferenceTp difference_type;
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
|
||||
private:
|
||||
typedef pseudo_sequence_iterator<T, _DifferenceTp> type;
|
||||
typedef _PseudoSequenceIterator<_Tp, _DifferenceTp> _Self;
|
||||
|
||||
const T& val;
|
||||
difference_type pos;
|
||||
const _Tp& _M_val;
|
||||
_DifferenceType _M_pos;
|
||||
|
||||
public:
|
||||
pseudo_sequence_iterator(const T& val, difference_type pos)
|
||||
: val(val), pos(pos) { }
|
||||
_PseudoSequenceIterator(const _Tp& _M_val, _DifferenceType _M_pos)
|
||||
: _M_val(_M_val), _M_pos(_M_pos) { }
|
||||
|
||||
// Pre-increment operator.
|
||||
type&
|
||||
_Self&
|
||||
operator++()
|
||||
{
|
||||
++pos;
|
||||
++_M_pos;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Post-increment operator.
|
||||
const type
|
||||
const _Self
|
||||
operator++(int)
|
||||
{ return type(pos++); }
|
||||
{ return _Self(_M_pos++); }
|
||||
|
||||
const T&
|
||||
const _Tp&
|
||||
operator*() const
|
||||
{ return val; }
|
||||
{ return _M_val; }
|
||||
|
||||
const T&
|
||||
operator[](difference_type) const
|
||||
{ return val; }
|
||||
const _Tp&
|
||||
operator[](_DifferenceType) const
|
||||
{ return _M_val; }
|
||||
|
||||
bool
|
||||
operator==(const type& i2)
|
||||
{ return pos == i2.pos; }
|
||||
operator==(const _Self& __i2)
|
||||
{ return _M_pos == __i2._M_pos; }
|
||||
|
||||
difference_type
|
||||
operator!=(const type& i2)
|
||||
{ return pos != i2.pos; }
|
||||
_DifferenceType
|
||||
operator!=(const _Self& __i2)
|
||||
{ return _M_pos != __i2._M_pos; }
|
||||
|
||||
difference_type
|
||||
operator-(const type& i2)
|
||||
{ return pos - i2.pos; }
|
||||
_DifferenceType
|
||||
operator-(const _Self& __i2)
|
||||
{ return _M_pos - __i2._M_pos; }
|
||||
};
|
||||
|
||||
/** @brief Sequence that conceptually consists of multiple copies of
|
||||
the same element.
|
||||
* The copies are not stored explicitly, of course.
|
||||
* @param T Sequence value type.
|
||||
* @param difference_type Sequence difference type.
|
||||
* @param _Tp Sequence _M_value type.
|
||||
* @param _DifferenceType Sequence difference type.
|
||||
*/
|
||||
template<typename T, typename _DifferenceTp>
|
||||
class pseudo_sequence
|
||||
template<typename _Tp, typename _DifferenceTp>
|
||||
class _PseudoSequence
|
||||
{
|
||||
typedef pseudo_sequence<T, _DifferenceTp> type;
|
||||
typedef _PseudoSequence<_Tp, _DifferenceTp> _Self;
|
||||
|
||||
public:
|
||||
typedef _DifferenceTp difference_type;
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
|
||||
// Better case down to uint64, than up to _DifferenceTp.
|
||||
typedef pseudo_sequence_iterator<T, uint64> iterator;
|
||||
typedef _PseudoSequenceIterator<_Tp, uint64> iterator;
|
||||
|
||||
/** @brief Constructor.
|
||||
* @param val Element of the sequence.
|
||||
* @param count Number of (virtual) copies.
|
||||
* @param _M_val Element of the sequence.
|
||||
* @param __count Number of (virtual) copies.
|
||||
*/
|
||||
pseudo_sequence(const T& val, difference_type count)
|
||||
: val(val), count(count) { }
|
||||
_PseudoSequence(const _Tp& _M_val, _DifferenceType __count)
|
||||
: _M_val(_M_val), __count(__count) { }
|
||||
|
||||
/** @brief Begin iterator. */
|
||||
iterator
|
||||
begin() const
|
||||
{ return iterator(val, 0); }
|
||||
{ return iterator(_M_val, 0); }
|
||||
|
||||
/** @brief End iterator. */
|
||||
iterator
|
||||
end() const
|
||||
{ return iterator(val, count); }
|
||||
{ return iterator(_M_val, __count); }
|
||||
|
||||
private:
|
||||
const T& val;
|
||||
difference_type count;
|
||||
const _Tp& _M_val;
|
||||
_DifferenceType __count;
|
||||
};
|
||||
|
||||
/** @brief Functor that does nothing */
|
||||
template<typename _ValueTp>
|
||||
class void_functor
|
||||
class _VoidFunctor
|
||||
{
|
||||
inline void
|
||||
operator()(const _ValueTp& v) const { }
|
||||
operator()(const _ValueTp& __v) const { }
|
||||
};
|
||||
|
||||
/** @brief Compute the median of three referenced elements,
|
||||
according to @c comp.
|
||||
* @param a First iterator.
|
||||
* @param b Second iterator.
|
||||
* @param c Third iterator.
|
||||
* @param comp Comparator.
|
||||
according to @__c __comp.
|
||||
* @param __a First iterator.
|
||||
* @param __b Second iterator.
|
||||
* @param __c Third iterator.
|
||||
* @param __comp Comparator.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
RandomAccessIterator
|
||||
median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b,
|
||||
RandomAccessIterator c, Comparator& comp)
|
||||
template<typename _RAIter, typename _Compare>
|
||||
_RAIter
|
||||
__median_of_three_iterators(_RAIter __a, _RAIter __b,
|
||||
_RAIter __c, _Compare& __comp)
|
||||
{
|
||||
if (comp(*a, *b))
|
||||
if (comp(*b, *c))
|
||||
return b;
|
||||
if (__comp(*__a, *__b))
|
||||
if (__comp(*__b, *__c))
|
||||
return __b;
|
||||
else
|
||||
if (comp(*a, *c))
|
||||
return c;
|
||||
if (__comp(*__a, *__c))
|
||||
return __c;
|
||||
else
|
||||
return a;
|
||||
return __a;
|
||||
else
|
||||
{
|
||||
// Just swap a and b.
|
||||
if (comp(*a, *c))
|
||||
return a;
|
||||
// Just swap __a and __b.
|
||||
if (__comp(*__a, *__c))
|
||||
return __a;
|
||||
else
|
||||
if (comp(*b, *c))
|
||||
return c;
|
||||
if (__comp(*__b, *__c))
|
||||
return __c;
|
||||
else
|
||||
return b;
|
||||
return __b;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,115 +39,115 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
/**
|
||||
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
|
||||
* @param begin Begin iterator of sequence.
|
||||
* @param end End iterator of sequence.
|
||||
* @param comp Comparator.
|
||||
* @return @c true if sorted, @c false otherwise.
|
||||
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __comp Comparator.
|
||||
* @return @__c true if sorted, @__c false otherwise.
|
||||
*/
|
||||
// XXX Comparator default template argument
|
||||
template<typename InputIterator, typename Comparator>
|
||||
// XXX Compare default template argument
|
||||
template<typename _IIter, typename _Compare>
|
||||
bool
|
||||
is_sorted(InputIterator begin, InputIterator end,
|
||||
Comparator comp
|
||||
= std::less<typename std::iterator_traits<InputIterator>::
|
||||
value_type>())
|
||||
__is_sorted(_IIter __begin, _IIter __end,
|
||||
_Compare __comp
|
||||
= std::less<typename std::iterator_traits<_IIter>::
|
||||
_ValueType>())
|
||||
{
|
||||
if (begin == end)
|
||||
if (__begin == __end)
|
||||
return true;
|
||||
|
||||
InputIterator current(begin), recent(begin);
|
||||
_IIter __current(__begin), __recent(__begin);
|
||||
|
||||
unsigned long long position = 1;
|
||||
for (current++; current != end; current++)
|
||||
unsigned long long __position = 1;
|
||||
for (__current++; __current != __end; __current++)
|
||||
{
|
||||
if (comp(*current, *recent))
|
||||
if (__comp(*__current, *__recent))
|
||||
{
|
||||
printf("is_sorted: check failed before position %i.\n",
|
||||
position);
|
||||
printf("__is_sorted: check failed before position %__i.\n",
|
||||
__position);
|
||||
return false;
|
||||
}
|
||||
recent = current;
|
||||
position++;
|
||||
__recent = __current;
|
||||
__position++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
|
||||
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
|
||||
* Prints the position in case an unordered pair is found.
|
||||
* @param begin Begin iterator of sequence.
|
||||
* @param end End iterator of sequence.
|
||||
* @param first_failure The first failure is returned in this variable.
|
||||
* @param comp Comparator.
|
||||
* @return @c true if sorted, @c false otherwise.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __first_failure The first failure is returned in this variable.
|
||||
* @param __comp Comparator.
|
||||
* @return @__c true if sorted, @__c false otherwise.
|
||||
*/
|
||||
// XXX Comparator default template argument
|
||||
template<typename InputIterator, typename Comparator>
|
||||
// XXX Compare default template argument
|
||||
template<typename _IIter, typename _Compare>
|
||||
bool
|
||||
is_sorted_failure(InputIterator begin, InputIterator end,
|
||||
InputIterator& first_failure,
|
||||
Comparator comp
|
||||
= std::less<typename std::iterator_traits<InputIterator>::
|
||||
value_type>())
|
||||
is_sorted_failure(_IIter __begin, _IIter __end,
|
||||
_IIter& __first_failure,
|
||||
_Compare __comp
|
||||
= std::less<typename std::iterator_traits<_IIter>::
|
||||
_ValueType>())
|
||||
{
|
||||
if (begin == end)
|
||||
if (__begin == __end)
|
||||
return true;
|
||||
|
||||
InputIterator current(begin), recent(begin);
|
||||
_IIter __current(__begin), __recent(__begin);
|
||||
|
||||
unsigned long long position = 1;
|
||||
for (current++; current != end; current++)
|
||||
unsigned long long __position = 1;
|
||||
for (__current++; __current != __end; __current++)
|
||||
{
|
||||
if (comp(*current, *recent))
|
||||
if (__comp(*__current, *__recent))
|
||||
{
|
||||
first_failure = current;
|
||||
printf("is_sorted: check failed before position %lld.\n",
|
||||
position);
|
||||
__first_failure = __current;
|
||||
printf("__is_sorted: check failed before position %lld.\n",
|
||||
__position);
|
||||
return false;
|
||||
}
|
||||
recent = current;
|
||||
position++;
|
||||
__recent = __current;
|
||||
__position++;
|
||||
}
|
||||
|
||||
first_failure = end;
|
||||
__first_failure = __end;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
|
||||
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
|
||||
* Prints all unordered pair, including the surrounding two elements.
|
||||
* @param begin Begin iterator of sequence.
|
||||
* @param end End iterator of sequence.
|
||||
* @param comp Comparator.
|
||||
* @return @c true if sorted, @c false otherwise.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __comp Comparator.
|
||||
* @return @__c true if sorted, @__c false otherwise.
|
||||
*/
|
||||
template<typename InputIterator, typename Comparator>
|
||||
template<typename _IIter, typename _Compare>
|
||||
bool
|
||||
// XXX Comparator default template argument
|
||||
is_sorted_print_failures(InputIterator begin, InputIterator end,
|
||||
Comparator comp
|
||||
// XXX Compare default template argument
|
||||
is_sorted_print_failures(_IIter __begin, _IIter __end,
|
||||
_Compare __comp
|
||||
= std::less<typename std::iterator_traits
|
||||
<InputIterator>::value_type>())
|
||||
<_IIter>::value_type>())
|
||||
{
|
||||
if (begin == end)
|
||||
if (__begin == __end)
|
||||
return true;
|
||||
|
||||
InputIterator recent(begin);
|
||||
bool ok = true;
|
||||
_IIter __recent(__begin);
|
||||
bool __ok = true;
|
||||
|
||||
for (InputIterator pos(begin + 1); pos != end; pos++)
|
||||
for (_IIter __pos(__begin + 1); __pos != __end; __pos++)
|
||||
{
|
||||
if (comp(*pos, *recent))
|
||||
if (__comp(*__pos, *__recent))
|
||||
{
|
||||
printf("%ld: %d %d %d %d\n", pos - begin, *(pos - 2),
|
||||
*(pos- 1), *pos, *(pos + 1));
|
||||
ok = false;
|
||||
printf("%ld: %d %d %d %d\n", __pos - __begin, *(__pos - 2),
|
||||
*(__pos- 1), *__pos, *(__pos + 1));
|
||||
__ok = false;
|
||||
}
|
||||
recent = pos;
|
||||
__recent = __pos;
|
||||
}
|
||||
return ok;
|
||||
return __ok;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,24 +61,24 @@ __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
#if defined(__ICC)
|
||||
template<typename must_be_int = int>
|
||||
int32 faa32(int32* x, int32 inc)
|
||||
template<typename _MustBeInt = int>
|
||||
int32 __faa32(int32* __x, int32 __inc)
|
||||
{
|
||||
asm volatile("lock xadd %0,%1"
|
||||
: "=r" (inc), "=m" (*x)
|
||||
: "0" (inc)
|
||||
: "=__r" (__inc), "=__m" (*__x)
|
||||
: "0" (__inc)
|
||||
: "memory");
|
||||
return inc;
|
||||
return __inc;
|
||||
}
|
||||
#if defined(__x86_64)
|
||||
template<typename must_be_int = int>
|
||||
int64 faa64(int64* x, int64 inc)
|
||||
template<typename _MustBeInt = int>
|
||||
int64 __faa64(int64* __x, int64 __inc)
|
||||
{
|
||||
asm volatile("lock xadd %0,%1"
|
||||
: "=r" (inc), "=m" (*x)
|
||||
: "0" (inc)
|
||||
: "=__r" (__inc), "=__m" (*__x)
|
||||
: "0" (__inc)
|
||||
: "memory");
|
||||
return inc;
|
||||
return __inc;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -88,106 +88,106 @@ namespace __gnu_parallel
|
|||
/** @brief Add a value to a variable, atomically.
|
||||
*
|
||||
* Implementation is heavily platform-dependent.
|
||||
* @param ptr Pointer to a 32-bit signed integer.
|
||||
* @param addend Value to add.
|
||||
* @param __ptr Pointer to a 32-bit signed integer.
|
||||
* @param __addend Value to add.
|
||||
*/
|
||||
inline int32
|
||||
fetch_and_add_32(volatile int32* ptr, int32 addend)
|
||||
__fetch_and_add_32(volatile int32* __ptr, int32 __addend)
|
||||
{
|
||||
#if defined(__ICC) //x86 version
|
||||
return _InterlockedExchangeAdd((void*)ptr, addend);
|
||||
return _InterlockedExchangeAdd((void*)__ptr, __addend);
|
||||
#elif defined(__ECC) //IA-64 version
|
||||
return _InterlockedExchangeAdd((void*)ptr, addend);
|
||||
return _InterlockedExchangeAdd((void*)__ptr, __addend);
|
||||
#elif defined(__ICL) || defined(_MSC_VER)
|
||||
return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr),
|
||||
addend);
|
||||
return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr),
|
||||
__addend);
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_fetch_and_add(ptr, addend);
|
||||
return __sync_fetch_and_add(__ptr, __addend);
|
||||
#elif defined(__SUNPRO_CC) && defined(__sparc)
|
||||
volatile int32 before, after;
|
||||
volatile int32 __before, __after;
|
||||
do
|
||||
{
|
||||
before = *ptr;
|
||||
after = before + addend;
|
||||
} while (atomic_cas_32((volatile unsigned int*)ptr, before,
|
||||
after) != before);
|
||||
return before;
|
||||
__before = *__ptr;
|
||||
__after = __before + __addend;
|
||||
} while (atomic_cas_32((volatile unsigned int*)__ptr, __before,
|
||||
__after) != __before);
|
||||
return __before;
|
||||
#else //fallback, slow
|
||||
#pragma message("slow fetch_and_add_32")
|
||||
int32 res;
|
||||
#pragma message("slow __fetch_and_add_32")
|
||||
int32 __res;
|
||||
#pragma omp critical
|
||||
{
|
||||
res = *ptr;
|
||||
*(ptr) += addend;
|
||||
__res = *__ptr;
|
||||
*(__ptr) += __addend;
|
||||
}
|
||||
return res;
|
||||
return __res;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Add a value to a variable, atomically.
|
||||
*
|
||||
* Implementation is heavily platform-dependent.
|
||||
* @param ptr Pointer to a 64-bit signed integer.
|
||||
* @param addend Value to add.
|
||||
* @param __ptr Pointer to a 64-bit signed integer.
|
||||
* @param __addend Value to add.
|
||||
*/
|
||||
inline int64
|
||||
fetch_and_add_64(volatile int64* ptr, int64 addend)
|
||||
__fetch_and_add_64(volatile int64* __ptr, int64 __addend)
|
||||
{
|
||||
#if defined(__ICC) && defined(__x86_64) //x86 version
|
||||
return faa64<int>((int64*)ptr, addend);
|
||||
return __faa64<int>((int64*)__ptr, __addend);
|
||||
#elif defined(__ECC) //IA-64 version
|
||||
return _InterlockedExchangeAdd64((void*)ptr, addend);
|
||||
return _InterlockedExchangeAdd64((void*)__ptr, __addend);
|
||||
#elif defined(__ICL) || defined(_MSC_VER)
|
||||
#ifndef _WIN64
|
||||
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
|
||||
return 0;
|
||||
#else
|
||||
return _InterlockedExchangeAdd64(ptr, addend);
|
||||
return _InterlockedExchangeAdd64(__ptr, __addend);
|
||||
#endif
|
||||
#elif defined(__GNUC__) && defined(__x86_64)
|
||||
return __sync_fetch_and_add(ptr, addend);
|
||||
return __sync_fetch_and_add(__ptr, __addend);
|
||||
#elif defined(__GNUC__) && defined(__i386) && \
|
||||
(defined(__i686) || defined(__pentium4) || defined(__athlon))
|
||||
return __sync_fetch_and_add(ptr, addend);
|
||||
return __sync_fetch_and_add(__ptr, __addend);
|
||||
#elif defined(__SUNPRO_CC) && defined(__sparc)
|
||||
volatile int64 before, after;
|
||||
volatile int64 __before, __after;
|
||||
do
|
||||
{
|
||||
before = *ptr;
|
||||
after = before + addend;
|
||||
} while (atomic_cas_64((volatile unsigned long long*)ptr, before,
|
||||
after) != before);
|
||||
return before;
|
||||
__before = *__ptr;
|
||||
__after = __before + __addend;
|
||||
} while (atomic_cas_64((volatile unsigned long long*)__ptr, __before,
|
||||
__after) != __before);
|
||||
return __before;
|
||||
#else //fallback, slow
|
||||
#if defined(__GNUC__) && defined(__i386)
|
||||
// XXX doesn't work with -march=native
|
||||
// XXX doesn'__t work with -march=native
|
||||
//#warning "please compile with -march=i686 or better"
|
||||
#endif
|
||||
#pragma message("slow fetch_and_add_64")
|
||||
int64 res;
|
||||
#pragma message("slow __fetch_and_add_64")
|
||||
int64 __res;
|
||||
#pragma omp critical
|
||||
{
|
||||
res = *ptr;
|
||||
*(ptr) += addend;
|
||||
__res = *__ptr;
|
||||
*(__ptr) += __addend;
|
||||
}
|
||||
return res;
|
||||
return __res;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Add a value to a variable, atomically.
|
||||
*
|
||||
* Implementation is heavily platform-dependent.
|
||||
* @param ptr Pointer to a signed integer.
|
||||
* @param addend Value to add.
|
||||
* @param __ptr Pointer to a signed integer.
|
||||
* @param __addend Value to add.
|
||||
*/
|
||||
template<typename T>
|
||||
inline T
|
||||
fetch_and_add(volatile T* ptr, T addend)
|
||||
template<typename _Tp>
|
||||
inline _Tp
|
||||
__fetch_and_add(volatile _Tp* __ptr, _Tp __addend)
|
||||
{
|
||||
if (sizeof(T) == sizeof(int32))
|
||||
return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend);
|
||||
else if (sizeof(T) == sizeof(int64))
|
||||
return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend);
|
||||
if (sizeof(_Tp) == sizeof(int32))
|
||||
return (_Tp)__fetch_and_add_32((volatile int32*) __ptr, (int32)__addend);
|
||||
else if (sizeof(_Tp) == sizeof(int64))
|
||||
return (_Tp)__fetch_and_add_64((volatile int64*) __ptr, (int64)__addend);
|
||||
else
|
||||
_GLIBCXX_PARALLEL_ASSERT(false);
|
||||
}
|
||||
|
|
@ -195,141 +195,141 @@ namespace __gnu_parallel
|
|||
|
||||
#if defined(__ICC)
|
||||
|
||||
template<typename must_be_int = int>
|
||||
template<typename _MustBeInt = int>
|
||||
inline int32
|
||||
cas32(volatile int32* ptr, int32 old, int32 nw)
|
||||
__cas32(volatile int32* __ptr, int32 __old, int32 __nw)
|
||||
{
|
||||
int32 before;
|
||||
int32 __before;
|
||||
__asm__ __volatile__("lock; cmpxchgl %1,%2"
|
||||
: "=a"(before)
|
||||
: "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
|
||||
: "=a"(__before)
|
||||
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old)
|
||||
: "memory");
|
||||
return before;
|
||||
return __before;
|
||||
}
|
||||
|
||||
#if defined(__x86_64)
|
||||
template<typename must_be_int = int>
|
||||
template<typename _MustBeInt = int>
|
||||
inline int64
|
||||
cas64(volatile int64 *ptr, int64 old, int64 nw)
|
||||
__cas64(volatile int64 *__ptr, int64 __old, int64 __nw)
|
||||
{
|
||||
int64 before;
|
||||
int64 __before;
|
||||
__asm__ __volatile__("lock; cmpxchgq %1,%2"
|
||||
: "=a"(before)
|
||||
: "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
|
||||
: "=a"(__before)
|
||||
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old)
|
||||
: "memory");
|
||||
return before;
|
||||
return __before;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/** @brief Compare @c *ptr and @c comparand. If equal, let @c
|
||||
* *ptr=replacement and return @c true, return @c false otherwise.
|
||||
/** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c
|
||||
* *__ptr=__replacement and return @__c true, return @__c false otherwise.
|
||||
*
|
||||
* Implementation is heavily platform-dependent.
|
||||
* @param ptr Pointer to 32-bit signed integer.
|
||||
* @param comparand Compare value.
|
||||
* @param replacement Replacement value.
|
||||
* @param __ptr Pointer to 32-bit signed integer.
|
||||
* @param __comparand Compare value.
|
||||
* @param __replacement Replacement value.
|
||||
*/
|
||||
inline bool
|
||||
compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement)
|
||||
__compare_and_swap_32(volatile int32* __ptr, int32 __comparand, int32 __replacement)
|
||||
{
|
||||
#if defined(__ICC) //x86 version
|
||||
return _InterlockedCompareExchange((void*)ptr, replacement,
|
||||
comparand) == comparand;
|
||||
return _InterlockedCompareExchange((void*)__ptr, __replacement,
|
||||
__comparand) == __comparand;
|
||||
#elif defined(__ECC) //IA-64 version
|
||||
return _InterlockedCompareExchange((void*)ptr, replacement,
|
||||
comparand) == comparand;
|
||||
return _InterlockedCompareExchange((void*)__ptr, __replacement,
|
||||
__comparand) == __comparand;
|
||||
#elif defined(__ICL) || defined(_MSC_VER)
|
||||
return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr),
|
||||
replacement, comparand) == comparand;
|
||||
return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(__ptr),
|
||||
__replacement, __comparand) == __comparand;
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_bool_compare_and_swap(ptr, comparand, replacement);
|
||||
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
|
||||
#elif defined(__SUNPRO_CC) && defined(__sparc)
|
||||
return atomic_cas_32((volatile unsigned int*)ptr, comparand,
|
||||
replacement) == comparand;
|
||||
return atomic_cas_32((volatile unsigned int*)__ptr, __comparand,
|
||||
__replacement) == __comparand;
|
||||
#else
|
||||
#pragma message("slow compare_and_swap_32")
|
||||
bool res = false;
|
||||
#pragma message("slow __compare_and_swap_32")
|
||||
bool __res = false;
|
||||
#pragma omp critical
|
||||
{
|
||||
if (*ptr == comparand)
|
||||
if (*__ptr == __comparand)
|
||||
{
|
||||
*ptr = replacement;
|
||||
res = true;
|
||||
*__ptr = __replacement;
|
||||
__res = true;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
return __res;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Compare @c *ptr and @c comparand. If equal, let @c
|
||||
* *ptr=replacement and return @c true, return @c false otherwise.
|
||||
/** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c
|
||||
* *__ptr=__replacement and return @__c true, return @__c false otherwise.
|
||||
*
|
||||
* Implementation is heavily platform-dependent.
|
||||
* @param ptr Pointer to 64-bit signed integer.
|
||||
* @param comparand Compare value.
|
||||
* @param replacement Replacement value.
|
||||
* @param __ptr Pointer to 64-bit signed integer.
|
||||
* @param __comparand Compare value.
|
||||
* @param __replacement Replacement value.
|
||||
*/
|
||||
inline bool
|
||||
compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement)
|
||||
__compare_and_swap_64(volatile int64* __ptr, int64 __comparand, int64 __replacement)
|
||||
{
|
||||
#if defined(__ICC) && defined(__x86_64) //x86 version
|
||||
return cas64<int>(ptr, comparand, replacement) == comparand;
|
||||
return __cas64<int>(__ptr, __comparand, __replacement) == __comparand;
|
||||
#elif defined(__ECC) //IA-64 version
|
||||
return _InterlockedCompareExchange64((void*)ptr, replacement,
|
||||
comparand) == comparand;
|
||||
return _InterlockedCompareExchange64((void*)__ptr, __replacement,
|
||||
__comparand) == __comparand;
|
||||
#elif defined(__ICL) || defined(_MSC_VER)
|
||||
#ifndef _WIN64
|
||||
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
|
||||
return 0;
|
||||
#else
|
||||
return _InterlockedCompareExchange64(ptr, replacement,
|
||||
comparand) == comparand;
|
||||
return _InterlockedCompareExchange64(__ptr, __replacement,
|
||||
__comparand) == __comparand;
|
||||
#endif
|
||||
|
||||
#elif defined(__GNUC__) && defined(__x86_64)
|
||||
return __sync_bool_compare_and_swap(ptr, comparand, replacement);
|
||||
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
|
||||
#elif defined(__GNUC__) && defined(__i386) && \
|
||||
(defined(__i686) || defined(__pentium4) || defined(__athlon))
|
||||
return __sync_bool_compare_and_swap(ptr, comparand, replacement);
|
||||
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
|
||||
#elif defined(__SUNPRO_CC) && defined(__sparc)
|
||||
return atomic_cas_64((volatile unsigned long long*)ptr,
|
||||
comparand, replacement) == comparand;
|
||||
return atomic_cas_64((volatile unsigned long long*)__ptr,
|
||||
__comparand, __replacement) == __comparand;
|
||||
#else
|
||||
#if defined(__GNUC__) && defined(__i386)
|
||||
// XXX -march=native
|
||||
//#warning "please compile with -march=i686 or better"
|
||||
#endif
|
||||
#pragma message("slow compare_and_swap_64")
|
||||
bool res = false;
|
||||
#pragma message("slow __compare_and_swap_64")
|
||||
bool __res = false;
|
||||
#pragma omp critical
|
||||
{
|
||||
if (*ptr == comparand)
|
||||
if (*__ptr == __comparand)
|
||||
{
|
||||
*ptr = replacement;
|
||||
res = true;
|
||||
*__ptr = __replacement;
|
||||
__res = true;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
return __res;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Compare @c *ptr and @c comparand. If equal, let @c
|
||||
* *ptr=replacement and return @c true, return @c false otherwise.
|
||||
/** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c
|
||||
* *__ptr=__replacement and return @__c true, return @__c false otherwise.
|
||||
*
|
||||
* Implementation is heavily platform-dependent.
|
||||
* @param ptr Pointer to signed integer.
|
||||
* @param comparand Compare value.
|
||||
* @param replacement Replacement value. */
|
||||
template<typename T>
|
||||
* @param __ptr Pointer to signed integer.
|
||||
* @param __comparand Compare value.
|
||||
* @param __replacement Replacement value. */
|
||||
template<typename _Tp>
|
||||
inline bool
|
||||
compare_and_swap(volatile T* ptr, T comparand, T replacement)
|
||||
__compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement)
|
||||
{
|
||||
if (sizeof(T) == sizeof(int32))
|
||||
return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement);
|
||||
else if (sizeof(T) == sizeof(int64))
|
||||
return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement);
|
||||
if (sizeof(_Tp) == sizeof(int32))
|
||||
return __compare_and_swap_32((volatile int32*) __ptr, (int32)__comparand, (int32)__replacement);
|
||||
else if (sizeof(_Tp) == sizeof(int64))
|
||||
return __compare_and_swap_64((volatile int64*) __ptr, (int64)__comparand, (int64)__replacement);
|
||||
else
|
||||
_GLIBCXX_PARALLEL_ASSERT(false);
|
||||
}
|
||||
|
|
@ -337,7 +337,7 @@ namespace __gnu_parallel
|
|||
/** @brief Yield the control to another thread, without waiting for
|
||||
the end to the time slice. */
|
||||
inline void
|
||||
yield()
|
||||
__yield()
|
||||
{
|
||||
#if defined (_WIN32) && !defined (__CYGWIN__)
|
||||
Sleep(0);
|
||||
|
|
|
|||
|
|
@ -38,15 +38,15 @@
|
|||
|
||||
/** @def _GLIBCXX_CALL
|
||||
* @brief Macro to produce log message when entering a function.
|
||||
* @param n Input size.
|
||||
* @param __n Input size.
|
||||
* @see _GLIBCXX_VERBOSE_LEVEL */
|
||||
#if (_GLIBCXX_VERBOSE_LEVEL == 0)
|
||||
#define _GLIBCXX_CALL(n)
|
||||
#define _GLIBCXX_CALL(__n)
|
||||
#endif
|
||||
#if (_GLIBCXX_VERBOSE_LEVEL == 1)
|
||||
#define _GLIBCXX_CALL(n) \
|
||||
printf(" %s:\niam = %d, n = %ld, num_threads = %d\n", \
|
||||
__PRETTY_FUNCTION__, omp_get_thread_num(), (n), get_max_threads());
|
||||
#define _GLIBCXX_CALL(__n) \
|
||||
printf(" %__s:\niam = %d, __n = %ld, __num_threads = %d\n", \
|
||||
__PRETTY_FUNCTION__, omp_get_thread_num(), (__n), __get_max_threads());
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_SCALE_DOWN_FPU
|
||||
|
|
@ -64,12 +64,12 @@
|
|||
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
|
||||
* Consider the size of the L1 cache for
|
||||
* __gnu_parallel::parallel_random_shuffle(). */
|
||||
* gnu_parallel::__parallel_random_shuffle(). */
|
||||
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
|
||||
#endif
|
||||
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
|
||||
* Consider the size of the TLB for
|
||||
* __gnu_parallel::parallel_random_shuffle(). */
|
||||
* gnu_parallel::__parallel_random_shuffle(). */
|
||||
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -33,54 +33,54 @@
|
|||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Function to split a sequence into parts of almost equal size.
|
||||
/** @brief function to split a sequence into parts of almost equal size.
|
||||
*
|
||||
* The resulting sequence s of length num_threads+1 contains the splitting
|
||||
* positions when splitting the range [0,n) into parts of almost
|
||||
* The resulting sequence __s of length __num_threads+1 contains the splitting
|
||||
* positions when splitting the range [0,__n) into parts of almost
|
||||
* equal size (plus minus 1). The first entry is 0, the last one
|
||||
* n. There may result empty parts.
|
||||
* @param n Number of elements
|
||||
* @param num_threads Number of parts
|
||||
* @param s Splitters
|
||||
* @returns End of splitter sequence, i. e. @c s+num_threads+1 */
|
||||
template<typename difference_type, typename OutputIterator>
|
||||
OutputIterator
|
||||
equally_split(difference_type n, thread_index_t num_threads, OutputIterator s)
|
||||
* n. There may result empty parts.
|
||||
* @param __n Number of elements
|
||||
* @param __num_threads Number of parts
|
||||
* @param __s Splitters
|
||||
* @returns End of splitter sequence, i.e. @__c __s+__num_threads+1 */
|
||||
template<typename _DifferenceType, typename _OutputIterator>
|
||||
_OutputIterator
|
||||
equally_split(_DifferenceType __n, _ThreadIndex __num_threads, _OutputIterator __s)
|
||||
{
|
||||
difference_type chunk_length = n / num_threads;
|
||||
difference_type num_longer_chunks = n % num_threads;
|
||||
difference_type pos = 0;
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
_DifferenceType __chunk_length = __n / __num_threads;
|
||||
_DifferenceType __num_longer_chunks = __n % __num_threads;
|
||||
_DifferenceType __pos = 0;
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
*s++ = pos;
|
||||
pos += (i < num_longer_chunks) ? (chunk_length + 1) : chunk_length;
|
||||
*__s++ = __pos;
|
||||
__pos += (__i < __num_longer_chunks) ? (__chunk_length + 1) : __chunk_length;
|
||||
}
|
||||
*s++ = n;
|
||||
return s;
|
||||
*__s++ = __n;
|
||||
return __s;
|
||||
}
|
||||
|
||||
|
||||
/** @brief Function to split a sequence into parts of almost equal size.
|
||||
/** @brief function to split a sequence into parts of almost equal size.
|
||||
*
|
||||
* Returns the position of the splitting point between
|
||||
* thread number thread_no (included) and
|
||||
* thread number thread_no+1 (excluded).
|
||||
* @param n Number of elements
|
||||
* @param num_threads Number of parts
|
||||
* @returns _SplittingAlgorithm point */
|
||||
template<typename difference_type>
|
||||
difference_type
|
||||
equally_split_point(difference_type n,
|
||||
thread_index_t num_threads,
|
||||
thread_index_t thread_no)
|
||||
* thread number __thread_no (included) and
|
||||
* thread number __thread_no+1 (excluded).
|
||||
* @param __n Number of elements
|
||||
* @param __num_threads Number of parts
|
||||
* @returns splitting point */
|
||||
template<typename _DifferenceType>
|
||||
_DifferenceType
|
||||
equally_split_point(_DifferenceType __n,
|
||||
_ThreadIndex __num_threads,
|
||||
_ThreadIndex __thread_no)
|
||||
{
|
||||
difference_type chunk_length = n / num_threads;
|
||||
difference_type num_longer_chunks = n % num_threads;
|
||||
if (thread_no < num_longer_chunks)
|
||||
return thread_no * (chunk_length + 1);
|
||||
_DifferenceType __chunk_length = __n / __num_threads;
|
||||
_DifferenceType __num_longer_chunks = __n % __num_threads;
|
||||
if (__thread_no < __num_longer_chunks)
|
||||
return __thread_no * (__chunk_length + 1);
|
||||
else
|
||||
return num_longer_chunks * (chunk_length + 1)
|
||||
+ (thread_no - num_longer_chunks) * chunk_length;
|
||||
return __num_longer_chunks * (__chunk_length + 1)
|
||||
+ (__thread_no - __num_longer_chunks) * __chunk_length;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@
|
|||
#ifndef _GLIBCXX_TREE_INITIAL_SPLITTING
|
||||
/** @def _GLIBCXX_TREE_INITIAL_SPLITTING
|
||||
* @brief Include the initial splitting variant for
|
||||
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end).
|
||||
* _Rb_tree::insert_unique(_IIter beg, _IIter __end).
|
||||
* @see __gnu_parallel::_Rb_tree */
|
||||
#define _GLIBCXX_TREE_INITIAL_SPLITTING 1
|
||||
#endif
|
||||
|
|
@ -86,7 +86,7 @@
|
|||
#ifndef _GLIBCXX_TREE_DYNAMIC_BALANCING
|
||||
/** @def _GLIBCXX_TREE_DYNAMIC_BALANCING
|
||||
* @brief Include the dynamic balancing variant for
|
||||
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end).
|
||||
* _Rb_tree::insert_unique(_IIter beg, _IIter __end).
|
||||
* @see __gnu_parallel::_Rb_tree */
|
||||
#define _GLIBCXX_TREE_DYNAMIC_BALANCING 1
|
||||
#endif
|
||||
|
|
@ -94,7 +94,7 @@
|
|||
#ifndef _GLIBCXX_TREE_FULL_COPY
|
||||
/** @def _GLIBCXX_TREE_FULL_COPY
|
||||
* @brief In order to sort the input sequence of
|
||||
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end) a
|
||||
* _Rb_tree::insert_unique(_IIter beg, _IIter __end) a
|
||||
* full copy of the input elements is done.
|
||||
* @see __gnu_parallel::_Rb_tree */
|
||||
#define _GLIBCXX_TREE_FULL_COPY 1
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/** @file parallel/find.h
|
||||
* @brief Parallel implementation base for std::find(), std::equal()
|
||||
* @brief Parallel implementation __base for std::find(), std::equal()
|
||||
* and related functions.
|
||||
* This file is a GNU parallel extension to the Standard C++ Library.
|
||||
*/
|
||||
|
|
@ -44,36 +44,36 @@ namespace __gnu_parallel
|
|||
{
|
||||
/**
|
||||
* @brief Parallel std::find, switch for different algorithms.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence. Must have same
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Must have same
|
||||
* length as first sequence.
|
||||
* @param pred Find predicate.
|
||||
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
*/
|
||||
template<typename RandomAccessIterator1,
|
||||
typename RandomAccessIterator2,
|
||||
typename Pred,
|
||||
typename Selector>
|
||||
inline std::pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Pred pred, Selector selector)
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
inline std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector)
|
||||
{
|
||||
switch (_Settings::get().find_algorithm)
|
||||
{
|
||||
case GROWING_BLOCKS:
|
||||
return find_template(begin1, end1, begin2, pred, selector,
|
||||
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
|
||||
growing_blocks_tag());
|
||||
case CONSTANT_SIZE_BLOCKS:
|
||||
return find_template(begin1, end1, begin2, pred, selector,
|
||||
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
|
||||
constant_size_blocks_tag());
|
||||
case EQUAL_SPLIT:
|
||||
return find_template(begin1, end1, begin2, pred, selector,
|
||||
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
|
||||
equal_split_tag());
|
||||
default:
|
||||
_GLIBCXX_PARALLEL_ASSERT(false);
|
||||
return std::make_pair(begin1, begin2);
|
||||
return std::make_pair(__begin1, __begin2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -81,80 +81,80 @@ template<typename RandomAccessIterator1,
|
|||
|
||||
/**
|
||||
* @brief Parallel std::find, equal splitting variant.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence. Second sequence
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param pred Find predicate.
|
||||
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
*/
|
||||
template<typename RandomAccessIterator1,
|
||||
typename RandomAccessIterator2,
|
||||
typename Pred,
|
||||
typename Selector>
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
find_template(RandomAccessIterator1 begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2,
|
||||
Pred pred,
|
||||
Selector selector,
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2,
|
||||
_Pred __pred,
|
||||
_Selector __selector,
|
||||
equal_split_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(end1 - begin1)
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
|
||||
typedef std::iterator_traits<RandomAccessIterator1> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
|
||||
difference_type length = end1 - begin1;
|
||||
difference_type result = length;
|
||||
difference_type* borders;
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
_DifferenceType __result = __length;
|
||||
_DifferenceType* __borders;
|
||||
|
||||
omp_lock_t result_lock;
|
||||
omp_init_lock(&result_lock);
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
thread_index_t num_threads = get_max_threads();
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
borders = new difference_type[num_threads + 1];
|
||||
equally_split(length, num_threads, borders);
|
||||
__num_threads = omp_get_num_threads();
|
||||
__borders = new _DifferenceType[__num_threads + 1];
|
||||
equally_split(__length, __num_threads, __borders);
|
||||
} //single
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
difference_type start = borders[iam], stop = borders[iam + 1];
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DifferenceType __start = __borders[__iam], __stop = __borders[__iam + 1];
|
||||
|
||||
RandomAccessIterator1 i1 = begin1 + start;
|
||||
RandomAccessIterator2 i2 = begin2 + start;
|
||||
for (difference_type pos = start; pos < stop; ++pos)
|
||||
_RAIter1 __i1 = __begin1 + __start;
|
||||
_RAIter2 __i2 = __begin2 + __start;
|
||||
for (_DifferenceType __pos = __start; __pos < __stop; ++__pos)
|
||||
{
|
||||
#pragma omp flush(result)
|
||||
#pragma omp flush(__result)
|
||||
// Result has been set to something lower.
|
||||
if (result < pos)
|
||||
if (__result < __pos)
|
||||
break;
|
||||
|
||||
if (selector(i1, i2, pred))
|
||||
if (__selector(__i1, __i2, __pred))
|
||||
{
|
||||
omp_set_lock(&result_lock);
|
||||
if (pos < result)
|
||||
result = pos;
|
||||
omp_unset_lock(&result_lock);
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__pos < __result)
|
||||
__result = __pos;
|
||||
omp_unset_lock(&__result_lock);
|
||||
break;
|
||||
}
|
||||
++i1;
|
||||
++i2;
|
||||
++__i1;
|
||||
++__i2;
|
||||
}
|
||||
} //parallel
|
||||
|
||||
omp_destroy_lock(&result_lock);
|
||||
delete[] borders;
|
||||
omp_destroy_lock(&__result_lock);
|
||||
delete[] __borders;
|
||||
|
||||
return
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
|
||||
begin2 + result);
|
||||
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
|
||||
__begin2 + __result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -163,12 +163,12 @@ template<typename RandomAccessIterator1,
|
|||
|
||||
/**
|
||||
* @brief Parallel std::find, growing block size variant.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence. Second sequence
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param pred Find predicate.
|
||||
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
* @see __gnu_parallel::_Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::_Settings::find_initial_block_size
|
||||
|
|
@ -183,105 +183,105 @@ template<typename RandomAccessIterator1,
|
|||
* for CSB, the blocks are allocated in a predetermined manner,
|
||||
* namely spacial round-robin.
|
||||
*/
|
||||
template<typename RandomAccessIterator1,
|
||||
typename RandomAccessIterator2,
|
||||
typename Pred,
|
||||
typename Selector>
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Pred pred, Selector selector,
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
|
||||
growing_blocks_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(end1 - begin1)
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
|
||||
typedef std::iterator_traits<RandomAccessIterator1> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
difference_type length = end1 - begin1;
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
|
||||
difference_type sequential_search_size =
|
||||
std::min<difference_type>(length, __s.find_sequential_search_size);
|
||||
_DifferenceType __sequential_search_size =
|
||||
std::min<_DifferenceType>(__length, __s.find_sequential_search_size);
|
||||
|
||||
// Try it sequentially first.
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
|
||||
selector.sequential_algorithm(
|
||||
begin1, begin1 + sequential_search_size, begin2, pred);
|
||||
std::pair<_RAIter1, _RAIter2> __find_seq_result =
|
||||
__selector._M_sequential_algorithm(
|
||||
__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
|
||||
|
||||
if (find_seq_result.first != (begin1 + sequential_search_size))
|
||||
return find_seq_result;
|
||||
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
|
||||
return __find_seq_result;
|
||||
|
||||
// Index of beginning of next free block (after sequential find).
|
||||
difference_type next_block_start = sequential_search_size;
|
||||
difference_type result = length;
|
||||
_DifferenceType __next_block_start = __sequential_search_size;
|
||||
_DifferenceType __result = __length;
|
||||
|
||||
omp_lock_t result_lock;
|
||||
omp_init_lock(&result_lock);
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
thread_index_t num_threads = get_max_threads();
|
||||
# pragma omp parallel shared(result) num_threads(num_threads)
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel shared(__result) num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
num_threads = omp_get_num_threads();
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
// Not within first k elements -> start parallel.
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
// Not within first __k __elements -> start parallel.
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
difference_type block_size = __s.find_initial_block_size;
|
||||
difference_type start =
|
||||
fetch_and_add<difference_type>(&next_block_start, block_size);
|
||||
_DifferenceType __block_size = __s.find_initial_block_size;
|
||||
_DifferenceType __start =
|
||||
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
|
||||
|
||||
// Get new block, update pointer to next block.
|
||||
difference_type stop =
|
||||
std::min<difference_type>(length, start + block_size);
|
||||
_DifferenceType __stop =
|
||||
std::min<_DifferenceType>(__length, __start + __block_size);
|
||||
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
|
||||
std::pair<_RAIter1, _RAIter2> __local_result;
|
||||
|
||||
while (start < length)
|
||||
while (__start < __length)
|
||||
{
|
||||
# pragma omp flush(result)
|
||||
# pragma omp flush(__result)
|
||||
// Get new value of result.
|
||||
if (result < start)
|
||||
if (__result < __start)
|
||||
{
|
||||
// No chance to find first element.
|
||||
break;
|
||||
}
|
||||
|
||||
local_result = selector.sequential_algorithm(
|
||||
begin1 + start, begin1 + stop, begin2 + start, pred);
|
||||
if (local_result.first != (begin1 + stop))
|
||||
__local_result = __selector._M_sequential_algorithm(
|
||||
__begin1 + __start, __begin1 + __stop, __begin2 + __start, __pred);
|
||||
if (__local_result.first != (__begin1 + __stop))
|
||||
{
|
||||
omp_set_lock(&result_lock);
|
||||
if ((local_result.first - begin1) < result)
|
||||
omp_set_lock(&__result_lock);
|
||||
if ((__local_result.first - __begin1) < __result)
|
||||
{
|
||||
result = local_result.first - begin1;
|
||||
__result = __local_result.first - __begin1;
|
||||
|
||||
// Result cannot be in future blocks, stop algorithm.
|
||||
fetch_and_add<difference_type>(&next_block_start, length);
|
||||
__fetch_and_add<_DifferenceType>(&__next_block_start, __length);
|
||||
}
|
||||
omp_unset_lock(&result_lock);
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
|
||||
block_size =
|
||||
std::min<difference_type>(block_size * __s.find_increasing_factor,
|
||||
__block_size =
|
||||
std::min<_DifferenceType>(__block_size * __s.find_increasing_factor,
|
||||
__s.find_maximum_block_size);
|
||||
|
||||
// Get new block, update pointer to next block.
|
||||
start =
|
||||
fetch_and_add<difference_type>(&next_block_start, block_size);
|
||||
stop = ((length < (start + block_size))
|
||||
? length : (start + block_size));
|
||||
__start =
|
||||
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
|
||||
__stop = ((__length < (__start + __block_size))
|
||||
? __length : (__start + __block_size));
|
||||
}
|
||||
} //parallel
|
||||
|
||||
omp_destroy_lock(&result_lock);
|
||||
omp_destroy_lock(&__result_lock);
|
||||
|
||||
// Return iterator on found element.
|
||||
return
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
|
||||
begin2 + result);
|
||||
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
|
||||
__begin2 + __result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -290,12 +290,12 @@ template<typename RandomAccessIterator1,
|
|||
|
||||
/**
|
||||
* @brief Parallel std::find, constant block size variant.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence. Second sequence
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param pred Find predicate.
|
||||
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
* @see __gnu_parallel::_Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::_Settings::find_block_size
|
||||
|
|
@ -306,94 +306,94 @@ template<typename RandomAccessIterator1,
|
|||
* blocks are allocated in a predetermined manner, namely spacial
|
||||
* round-robin.
|
||||
*/
|
||||
template<typename RandomAccessIterator1,
|
||||
typename RandomAccessIterator2,
|
||||
typename Pred,
|
||||
typename Selector>
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Pred pred, Selector selector,
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
|
||||
constant_size_blocks_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(end1 - begin1)
|
||||
typedef std::iterator_traits<RandomAccessIterator1> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
difference_type length = end1 - begin1;
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
|
||||
difference_type sequential_search_size = std::min<difference_type>(
|
||||
length, __s.find_sequential_search_size);
|
||||
_DifferenceType __sequential_search_size = std::min<_DifferenceType>(
|
||||
__length, __s.find_sequential_search_size);
|
||||
|
||||
// Try it sequentially first.
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
|
||||
selector.sequential_algorithm(begin1, begin1 + sequential_search_size,
|
||||
begin2, pred);
|
||||
std::pair<_RAIter1, _RAIter2> __find_seq_result =
|
||||
__selector._M_sequential_algorithm(__begin1, __begin1 + __sequential_search_size,
|
||||
__begin2, __pred);
|
||||
|
||||
if (find_seq_result.first != (begin1 + sequential_search_size))
|
||||
return find_seq_result;
|
||||
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
|
||||
return __find_seq_result;
|
||||
|
||||
difference_type result = length;
|
||||
omp_lock_t result_lock;
|
||||
omp_init_lock(&result_lock);
|
||||
_DifferenceType __result = __length;
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
// Not within first sequential_search_size elements -> start parallel.
|
||||
// Not within first __sequential_search_size elements -> start parallel.
|
||||
|
||||
thread_index_t num_threads = get_max_threads();
|
||||
# pragma omp parallel shared(result) num_threads(num_threads)
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel shared(__result) num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
num_threads = omp_get_num_threads();
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
difference_type block_size = __s.find_initial_block_size;
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DifferenceType __block_size = __s.find_initial_block_size;
|
||||
|
||||
// First element of thread's current iteration.
|
||||
difference_type iteration_start = sequential_search_size;
|
||||
_DifferenceType __iteration_start = __sequential_search_size;
|
||||
|
||||
// Where to work (initialization).
|
||||
difference_type start = iteration_start + iam * block_size;
|
||||
difference_type stop =
|
||||
std::min<difference_type>(length, start + block_size);
|
||||
_DifferenceType __start = __iteration_start + __iam * __block_size;
|
||||
_DifferenceType __stop =
|
||||
std::min<_DifferenceType>(__length, __start + __block_size);
|
||||
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
|
||||
std::pair<_RAIter1, _RAIter2> __local_result;
|
||||
|
||||
while (start < length)
|
||||
while (__start < __length)
|
||||
{
|
||||
// Get new value of result.
|
||||
# pragma omp flush(result)
|
||||
# pragma omp flush(__result)
|
||||
// No chance to find first element.
|
||||
if (result < start)
|
||||
if (__result < __start)
|
||||
break;
|
||||
local_result = selector.sequential_algorithm(
|
||||
begin1 + start, begin1 + stop,
|
||||
begin2 + start, pred);
|
||||
if (local_result.first != (begin1 + stop))
|
||||
__local_result = __selector._M_sequential_algorithm(
|
||||
__begin1 + __start, __begin1 + __stop,
|
||||
__begin2 + __start, __pred);
|
||||
if (__local_result.first != (__begin1 + __stop))
|
||||
{
|
||||
omp_set_lock(&result_lock);
|
||||
if ((local_result.first - begin1) < result)
|
||||
result = local_result.first - begin1;
|
||||
omp_unset_lock(&result_lock);
|
||||
omp_set_lock(&__result_lock);
|
||||
if ((__local_result.first - __begin1) < __result)
|
||||
__result = __local_result.first - __begin1;
|
||||
omp_unset_lock(&__result_lock);
|
||||
// Will not find better value in its interval.
|
||||
break;
|
||||
}
|
||||
|
||||
iteration_start += num_threads * block_size;
|
||||
__iteration_start += __num_threads * __block_size;
|
||||
|
||||
// Where to work.
|
||||
start = iteration_start + iam * block_size;
|
||||
stop = std::min<difference_type>(length, start + block_size);
|
||||
__start = __iteration_start + __iam * __block_size;
|
||||
__stop = std::min<_DifferenceType>(__length, __start + __block_size);
|
||||
}
|
||||
} //parallel
|
||||
|
||||
omp_destroy_lock(&result_lock);
|
||||
omp_destroy_lock(&__result_lock);
|
||||
|
||||
// Return iterator on found element.
|
||||
return
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
|
||||
begin2 + result);
|
||||
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
|
||||
__begin2 + __result);
|
||||
}
|
||||
#endif
|
||||
} // end namespace
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/** @file parallel/find_selectors.h
|
||||
* @brief Function objects representing different tasks to be plugged
|
||||
* @brief _Function objects representing different tasks to be plugged
|
||||
* into the parallel find algorithm.
|
||||
* This file is a GNU parallel extension to the Standard C++ Library.
|
||||
*/
|
||||
|
|
@ -39,153 +39,153 @@
|
|||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Base class of all __gnu_parallel::find_template selectors. */
|
||||
struct generic_find_selector
|
||||
/** @brief Base class of all __gnu_parallel::__find_template selectors. */
|
||||
struct __generic_find_selector
|
||||
{ };
|
||||
|
||||
/**
|
||||
* @brief Test predicate on a single element, used for std::find()
|
||||
* and std::find_if ().
|
||||
*/
|
||||
struct find_if_selector : public generic_find_selector
|
||||
struct __find_if_selector : public __generic_find_selector
|
||||
{
|
||||
/** @brief Test on one position.
|
||||
* @param i1 Iterator on first sequence.
|
||||
* @param i2 Iterator on second sequence (unused).
|
||||
* @param pred Find predicate.
|
||||
/** @brief Test on one __position.
|
||||
* @param __i1 _Iterator on first sequence.
|
||||
* @param __i2 _Iterator on second sequence (unused).
|
||||
* @param __pred Find predicate.
|
||||
*/
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Pred>
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
bool
|
||||
operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
|
||||
{ return pred(*i1); }
|
||||
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
|
||||
{ return __pred(*__i1); }
|
||||
|
||||
/** @brief Corresponding sequential algorithm on a sequence.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param pred Find predicate.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __pred Find predicate.
|
||||
*/
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Pred>
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
sequential_algorithm(RandomAccessIterator1 begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Pred pred)
|
||||
{ return std::make_pair(find_if(begin1, end1, pred,
|
||||
sequential_tag()), begin2); }
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
_M_sequential_algorithm(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{ return std::make_pair(find_if(__begin1, __end1, __pred,
|
||||
sequential_tag()), __begin2); }
|
||||
};
|
||||
|
||||
/** @brief Test predicate on two adjacent elements. */
|
||||
struct adjacent_find_selector : public generic_find_selector
|
||||
/** @brief Test predicate on two adjacent __elements. */
|
||||
struct __adjacent_find_selector : public __generic_find_selector
|
||||
{
|
||||
/** @brief Test on one position.
|
||||
* @param i1 Iterator on first sequence.
|
||||
* @param i2 Iterator on second sequence (unused).
|
||||
* @param pred Find predicate.
|
||||
/** @brief Test on one __position.
|
||||
* @param __i1 _Iterator on first sequence.
|
||||
* @param __i2 _Iterator on second sequence (unused).
|
||||
* @param __pred Find predicate.
|
||||
*/
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Pred>
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
bool
|
||||
operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
|
||||
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
|
||||
{
|
||||
// Passed end iterator is one short.
|
||||
return pred(*i1, *(i1 + 1));
|
||||
return __pred(*__i1, *(__i1 + 1));
|
||||
}
|
||||
|
||||
/** @brief Corresponding sequential algorithm on a sequence.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param pred Find predicate.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __pred Find predicate.
|
||||
*/
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Pred>
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
sequential_algorithm(RandomAccessIterator1 begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Pred pred)
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
_M_sequential_algorithm(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{
|
||||
// Passed end iterator is one short.
|
||||
RandomAccessIterator1 spot = adjacent_find(begin1, end1 + 1,
|
||||
pred, sequential_tag());
|
||||
if (spot == (end1 + 1))
|
||||
spot = end1;
|
||||
return std::make_pair(spot, begin2);
|
||||
_RAIter1 spot = adjacent_find(__begin1, __end1 + 1,
|
||||
__pred, sequential_tag());
|
||||
if (spot == (__end1 + 1))
|
||||
spot = __end1;
|
||||
return std::make_pair(spot, __begin2);
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief Test inverted predicate on a single element. */
|
||||
struct mismatch_selector : public generic_find_selector
|
||||
struct __mismatch_selector : public __generic_find_selector
|
||||
{
|
||||
/**
|
||||
* @brief Test on one position.
|
||||
* @param i1 Iterator on first sequence.
|
||||
* @param i2 Iterator on second sequence (unused).
|
||||
* @param pred Find predicate.
|
||||
* @brief Test on one __position.
|
||||
* @param __i1 _Iterator on first sequence.
|
||||
* @param __i2 _Iterator on second sequence (unused).
|
||||
* @param __pred Find predicate.
|
||||
*/
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Pred>
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
bool
|
||||
operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
|
||||
{ return !pred(*i1, *i2); }
|
||||
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
|
||||
{ return !__pred(*__i1, *__i2); }
|
||||
|
||||
/**
|
||||
* @brief Corresponding sequential algorithm on a sequence.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param pred Find predicate.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __pred Find predicate.
|
||||
*/
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Pred>
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
sequential_algorithm(RandomAccessIterator1 begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Pred pred)
|
||||
{ return mismatch(begin1, end1, begin2, pred, sequential_tag()); }
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
_M_sequential_algorithm(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{ return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag()); }
|
||||
};
|
||||
|
||||
|
||||
/** @brief Test predicate on several elements. */
|
||||
template<typename ForwardIterator>
|
||||
struct find_first_of_selector : public generic_find_selector
|
||||
/** @brief Test predicate on several __elements. */
|
||||
template<typename _ForwardIterator>
|
||||
struct __find_first_of_selector : public __generic_find_selector
|
||||
{
|
||||
ForwardIterator begin;
|
||||
ForwardIterator end;
|
||||
_ForwardIterator __begin;
|
||||
_ForwardIterator __end;
|
||||
|
||||
explicit find_first_of_selector(ForwardIterator begin, ForwardIterator end)
|
||||
: begin(begin), end(end) { }
|
||||
explicit __find_first_of_selector(_ForwardIterator __begin, _ForwardIterator __end)
|
||||
: __begin(__begin), __end(__end) { }
|
||||
|
||||
/** @brief Test on one position.
|
||||
* @param i1 Iterator on first sequence.
|
||||
* @param i2 Iterator on second sequence (unused).
|
||||
* @param pred Find predicate. */
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Pred>
|
||||
/** @brief Test on one __position.
|
||||
* @param __i1 _Iterator on first sequence.
|
||||
* @param __i2 _Iterator on second sequence (unused).
|
||||
* @param __pred Find predicate. */
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
bool
|
||||
operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
|
||||
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
|
||||
{
|
||||
for (ForwardIterator pos_in_candidates = begin;
|
||||
pos_in_candidates != end; ++pos_in_candidates)
|
||||
if (pred(*i1, *pos_in_candidates))
|
||||
for (_ForwardIterator __pos_in_candidates = __begin;
|
||||
__pos_in_candidates != __end; ++__pos_in_candidates)
|
||||
if (__pred(*__i1, *__pos_in_candidates))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @brief Corresponding sequential algorithm on a sequence.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param pred Find predicate. */
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename Pred>
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2>
|
||||
sequential_algorithm(RandomAccessIterator1 begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Pred pred)
|
||||
{ return std::make_pair(find_first_of(begin1, end1, begin, end, pred,
|
||||
sequential_tag()), begin2); }
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __pred Find predicate. */
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
_M_sequential_algorithm(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{ return std::make_pair(find_first_of(__begin1, __end1, __begin, __end, __pred,
|
||||
sequential_tag()), __begin2); }
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,55 +42,55 @@
|
|||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Chose the desired algorithm by evaluating @c parallelism_tag.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param user_op A user-specified functor (comparator, predicate,
|
||||
/** @brief Chose the desired algorithm by evaluating @__c __parallelism_tag.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __user_op A user-specified functor (comparator, predicate,
|
||||
* associative operator,...)
|
||||
* @param functionality functor to "process" an element with
|
||||
* user_op (depends on desired functionality, e. g. accumulate,
|
||||
* @param __functionality functor to "process" an element with
|
||||
* __user_op (depends on desired functionality, e. g. accumulate,
|
||||
* for_each,...
|
||||
* @param reduction Reduction functor.
|
||||
* @param reduction_start Initial value for reduction.
|
||||
* @param output Output iterator.
|
||||
* @param bound Maximum number of elements processed.
|
||||
* @param parallelism_tag Parallelization method */
|
||||
template<typename InputIterator, typename UserOp,
|
||||
typename Functionality, typename Red, typename Result>
|
||||
UserOp
|
||||
for_each_template_random_access(InputIterator begin, InputIterator end,
|
||||
UserOp user_op,
|
||||
Functionality& functionality,
|
||||
Red reduction, Result reduction_start,
|
||||
Result& output, typename
|
||||
std::iterator_traits<InputIterator>::
|
||||
difference_type bound,
|
||||
_Parallelism parallelism_tag)
|
||||
* @param __reduction Reduction functor.
|
||||
* @param __reduction_start Initial value for reduction.
|
||||
* @param __output Output iterator.
|
||||
* @param __bound Maximum number of elements processed.
|
||||
* @param __parallelism_tag Parallelization method */
|
||||
template<typename _IIter, typename _UserOp,
|
||||
typename _Functionality, typename _Red, typename _Result>
|
||||
_UserOp
|
||||
__for_each_template_random_access(_IIter __begin, _IIter __end,
|
||||
_UserOp __user_op,
|
||||
_Functionality& __functionality,
|
||||
_Red __reduction, _Result __reduction_start,
|
||||
_Result& __output, typename
|
||||
std::iterator_traits<_IIter>::
|
||||
difference_type __bound,
|
||||
_Parallelism __parallelism_tag)
|
||||
{
|
||||
if (parallelism_tag == parallel_unbalanced)
|
||||
return for_each_template_random_access_ed(begin, end, user_op,
|
||||
functionality, reduction,
|
||||
reduction_start,
|
||||
output, bound);
|
||||
else if (parallelism_tag == parallel_omp_loop)
|
||||
return for_each_template_random_access_omp_loop(begin, end, user_op,
|
||||
functionality,
|
||||
reduction,
|
||||
reduction_start,
|
||||
output, bound);
|
||||
else if (parallelism_tag == parallel_omp_loop_static)
|
||||
return for_each_template_random_access_omp_loop(begin, end, user_op,
|
||||
functionality,
|
||||
reduction,
|
||||
reduction_start,
|
||||
output, bound);
|
||||
if (__parallelism_tag == parallel_unbalanced)
|
||||
return for_each_template_random_access_ed(__begin, __end, __user_op,
|
||||
__functionality, __reduction,
|
||||
__reduction_start,
|
||||
__output, __bound);
|
||||
else if (__parallelism_tag == parallel_omp_loop)
|
||||
return for_each_template_random_access_omp_loop(__begin, __end, __user_op,
|
||||
__functionality,
|
||||
__reduction,
|
||||
__reduction_start,
|
||||
__output, __bound);
|
||||
else if (__parallelism_tag == parallel_omp_loop_static)
|
||||
return for_each_template_random_access_omp_loop(__begin, __end, __user_op,
|
||||
__functionality,
|
||||
__reduction,
|
||||
__reduction_start,
|
||||
__output, __bound);
|
||||
else //e. g. parallel_balanced
|
||||
return for_each_template_random_access_workstealing(begin, end,
|
||||
user_op,
|
||||
functionality,
|
||||
reduction,
|
||||
reduction_start,
|
||||
output, bound);
|
||||
return for_each_template_random_access_workstealing(__begin, __end,
|
||||
__user_op,
|
||||
__functionality,
|
||||
__reduction,
|
||||
__reduction_start,
|
||||
__output, __bound);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,192 +38,192 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
|
||||
/** @brief Generic selector for embarrassingly parallel functions. */
|
||||
template<typename It>
|
||||
struct generic_for_each_selector
|
||||
/** @brief Generic __selector for embarrassingly parallel functions. */
|
||||
template<typename _It>
|
||||
struct __generic_for_each_selector
|
||||
{
|
||||
/** @brief Iterator on last element processed; needed for some
|
||||
/** @brief _Iterator on last element processed; needed for some
|
||||
* algorithms (e. g. std::transform()).
|
||||
*/
|
||||
It finish_iterator;
|
||||
_It finish_iterator;
|
||||
};
|
||||
|
||||
|
||||
/** @brief std::for_each() selector. */
|
||||
template<typename It>
|
||||
struct for_each_selector : public generic_for_each_selector<It>
|
||||
template<typename _It>
|
||||
struct __for_each_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param o Operator.
|
||||
* @param i Iterator referencing object. */
|
||||
template<typename Op>
|
||||
* @param __o Operator.
|
||||
* @param __i iterator referencing object. */
|
||||
template<typename _Op>
|
||||
bool
|
||||
operator()(Op& o, It i)
|
||||
operator()(_Op& __o, _It __i)
|
||||
{
|
||||
o(*i);
|
||||
__o(*__i);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief std::generate() selector. */
|
||||
template<typename It>
|
||||
struct generate_selector : public generic_for_each_selector<It>
|
||||
template<typename _It>
|
||||
struct __generate_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param o Operator.
|
||||
* @param i Iterator referencing object. */
|
||||
template<typename Op>
|
||||
* @param __o Operator.
|
||||
* @param __i iterator referencing object. */
|
||||
template<typename _Op>
|
||||
bool
|
||||
operator()(Op& o, It i)
|
||||
operator()(_Op& __o, _It __i)
|
||||
{
|
||||
*i = o();
|
||||
*__i = __o();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief std::fill() selector. */
|
||||
template<typename It>
|
||||
struct fill_selector : public generic_for_each_selector<It>
|
||||
template<typename _It>
|
||||
struct __fill_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param v Current value.
|
||||
* @param i Iterator referencing object. */
|
||||
* @param __v Current value.
|
||||
* @param __i iterator referencing object. */
|
||||
template<typename Val>
|
||||
bool
|
||||
operator()(Val& v, It i)
|
||||
operator()(Val& __v, _It __i)
|
||||
{
|
||||
*i = v;
|
||||
*__i = __v;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief std::transform() selector, one input sequence variant. */
|
||||
template<typename It>
|
||||
struct transform1_selector : public generic_for_each_selector<It>
|
||||
/** @brief std::transform() __selector, one input sequence variant. */
|
||||
template<typename _It>
|
||||
struct __transform1_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param o Operator.
|
||||
* @param i Iterator referencing object. */
|
||||
template<typename Op>
|
||||
* @param __o Operator.
|
||||
* @param __i iterator referencing object. */
|
||||
template<typename _Op>
|
||||
bool
|
||||
operator()(Op& o, It i)
|
||||
operator()(_Op& __o, _It __i)
|
||||
{
|
||||
*i.second = o(*i.first);
|
||||
*__i.second = __o(*__i.first);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief std::transform() selector, two input sequences variant. */
|
||||
template<typename It>
|
||||
struct transform2_selector : public generic_for_each_selector<It>
|
||||
/** @brief std::transform() __selector, two input sequences variant. */
|
||||
template<typename _It>
|
||||
struct __transform2_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param o Operator.
|
||||
* @param i Iterator referencing object. */
|
||||
template<typename Op>
|
||||
* @param __o Operator.
|
||||
* @param __i iterator referencing object. */
|
||||
template<typename _Op>
|
||||
bool
|
||||
operator()(Op& o, It i)
|
||||
operator()(_Op& __o, _It __i)
|
||||
{
|
||||
*i.third = o(*i.first, *i.second);
|
||||
*__i.__third = __o(*__i.__first, *__i.__second);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief std::replace() selector. */
|
||||
template<typename It, typename T>
|
||||
struct replace_selector : public generic_for_each_selector<It>
|
||||
template<typename _It, typename _Tp>
|
||||
struct __replace_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Value to replace with. */
|
||||
const T& new_val;
|
||||
const _Tp& __new_val;
|
||||
|
||||
/** @brief Constructor
|
||||
* @param new_val Value to replace with. */
|
||||
* @param __new_val Value to replace with. */
|
||||
explicit
|
||||
replace_selector(const T &new_val) : new_val(new_val) {}
|
||||
__replace_selector(const _Tp &__new_val) : __new_val(__new_val) {}
|
||||
|
||||
/** @brief Functor execution.
|
||||
* @param v Current value.
|
||||
* @param i Iterator referencing object. */
|
||||
* @param __v Current value.
|
||||
* @param __i iterator referencing object. */
|
||||
bool
|
||||
operator()(T& v, It i)
|
||||
operator()(_Tp& __v, _It __i)
|
||||
{
|
||||
if (*i == v)
|
||||
*i = new_val;
|
||||
if (*__i == __v)
|
||||
*__i = __new_val;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief std::replace() selector. */
|
||||
template<typename It, typename Op, typename T>
|
||||
struct replace_if_selector : public generic_for_each_selector<It>
|
||||
template<typename _It, typename _Op, typename _Tp>
|
||||
struct __replace_if_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Value to replace with. */
|
||||
const T& new_val;
|
||||
const _Tp& __new_val;
|
||||
|
||||
/** @brief Constructor.
|
||||
* @param new_val Value to replace with. */
|
||||
* @param __new_val Value to replace with. */
|
||||
explicit
|
||||
replace_if_selector(const T &new_val) : new_val(new_val) { }
|
||||
__replace_if_selector(const _Tp &__new_val) : __new_val(__new_val) { }
|
||||
|
||||
/** @brief Functor execution.
|
||||
* @param o Operator.
|
||||
* @param i Iterator referencing object. */
|
||||
* @param __o Operator.
|
||||
* @param __i iterator referencing object. */
|
||||
bool
|
||||
operator()(Op& o, It i)
|
||||
operator()(_Op& __o, _It __i)
|
||||
{
|
||||
if (o(*i))
|
||||
*i = new_val;
|
||||
if (__o(*__i))
|
||||
*__i = __new_val;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief std::count() selector. */
|
||||
template<typename It, typename Diff>
|
||||
struct count_selector : public generic_for_each_selector<It>
|
||||
template<typename _It, typename _Diff>
|
||||
struct __count_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param v Current value.
|
||||
* @param i Iterator referencing object.
|
||||
* @param __v Current value.
|
||||
* @param __i iterator referencing object.
|
||||
* @return 1 if count, 0 if does not count. */
|
||||
template<typename Val>
|
||||
Diff
|
||||
operator()(Val& v, It i)
|
||||
{ return (v == *i) ? 1 : 0; }
|
||||
_Diff
|
||||
operator()(Val& __v, _It __i)
|
||||
{ return (__v == *__i) ? 1 : 0; }
|
||||
};
|
||||
|
||||
/** @brief std::count_if () selector. */
|
||||
template<typename It, typename Diff>
|
||||
struct count_if_selector : public generic_for_each_selector<It>
|
||||
template<typename _It, typename _Diff>
|
||||
struct __count_if_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param o Operator.
|
||||
* @param i Iterator referencing object.
|
||||
* @param __o Operator.
|
||||
* @param __i iterator referencing object.
|
||||
* @return 1 if count, 0 if does not count. */
|
||||
template<typename Op>
|
||||
Diff
|
||||
operator()(Op& o, It i)
|
||||
{ return (o(*i)) ? 1 : 0; }
|
||||
template<typename _Op>
|
||||
_Diff
|
||||
operator()(_Op& __o, _It __i)
|
||||
{ return (__o(*__i)) ? 1 : 0; }
|
||||
};
|
||||
|
||||
/** @brief std::accumulate() selector. */
|
||||
template<typename It>
|
||||
struct accumulate_selector : public generic_for_each_selector<It>
|
||||
template<typename _It>
|
||||
struct __accumulate_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param o Operator (unused).
|
||||
* @param i Iterator referencing object.
|
||||
* @param __o Operator (unused).
|
||||
* @param __i iterator referencing object.
|
||||
* @return The current value. */
|
||||
template<typename Op>
|
||||
typename std::iterator_traits<It>::value_type operator()(Op o, It i)
|
||||
{ return *i; }
|
||||
template<typename _Op>
|
||||
typename std::iterator_traits<_It>::value_type operator()(_Op __o, _It __i)
|
||||
{ return *__i; }
|
||||
};
|
||||
|
||||
/** @brief std::inner_product() selector. */
|
||||
template<typename It, typename It2, typename T>
|
||||
struct inner_product_selector : public generic_for_each_selector<It>
|
||||
template<typename _It, typename It2, typename _Tp>
|
||||
struct __inner_product_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Begin iterator of first sequence. */
|
||||
It begin1_iterator;
|
||||
_It __begin1_iterator;
|
||||
|
||||
/** @brief Begin iterator of second sequence. */
|
||||
It2 begin2_iterator;
|
||||
|
|
@ -232,50 +232,50 @@ namespace __gnu_parallel
|
|||
* @param b1 Begin iterator of first sequence.
|
||||
* @param b2 Begin iterator of second sequence. */
|
||||
explicit
|
||||
inner_product_selector(It b1, It2 b2)
|
||||
: begin1_iterator(b1), begin2_iterator(b2) { }
|
||||
__inner_product_selector(_It b1, It2 b2)
|
||||
: __begin1_iterator(b1), begin2_iterator(b2) { }
|
||||
|
||||
/** @brief Functor execution.
|
||||
* @param mult Multiplication functor.
|
||||
* @param current Iterator referencing object.
|
||||
* @return Inner product elemental result. */
|
||||
template<typename Op>
|
||||
T
|
||||
operator()(Op mult, It current)
|
||||
* @param __mult Multiplication functor.
|
||||
* @param __current iterator referencing object.
|
||||
* @return Inner product elemental __result. */
|
||||
template<typename _Op>
|
||||
_Tp
|
||||
operator()(_Op __mult, _It __current)
|
||||
{
|
||||
typename std::iterator_traits<It>::difference_type position
|
||||
= current - begin1_iterator;
|
||||
return mult(*current, *(begin2_iterator + position));
|
||||
typename std::iterator_traits<_It>::difference_type __position
|
||||
= __current - __begin1_iterator;
|
||||
return __mult(*__current, *(begin2_iterator + __position));
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief Selector that just returns the passed iterator. */
|
||||
template<typename It>
|
||||
struct identity_selector : public generic_for_each_selector<It>
|
||||
template<typename _It>
|
||||
struct __identity_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param o Operator (unused).
|
||||
* @param i Iterator referencing object.
|
||||
* @param __o Operator (unused).
|
||||
* @param __i iterator referencing object.
|
||||
* @return Passed iterator. */
|
||||
template<typename Op>
|
||||
It
|
||||
operator()(Op o, It i)
|
||||
{ return i; }
|
||||
template<typename _Op>
|
||||
_It
|
||||
operator()(_Op __o, _It __i)
|
||||
{ return __i; }
|
||||
};
|
||||
|
||||
/** @brief Selector that returns the difference between two adjacent
|
||||
* elements.
|
||||
* __elements.
|
||||
*/
|
||||
template<typename It>
|
||||
struct adjacent_difference_selector : public generic_for_each_selector<It>
|
||||
template<typename _It>
|
||||
struct __adjacent_difference_selector : public __generic_for_each_selector<_It>
|
||||
{
|
||||
template<typename Op>
|
||||
template<typename _Op>
|
||||
bool
|
||||
operator()(Op& o, It i)
|
||||
operator()(_Op& __o, _It __i)
|
||||
{
|
||||
typename It::first_type go_back_one = i.first;
|
||||
--go_back_one;
|
||||
*i.second = o(*i.first, *go_back_one);
|
||||
typename _It::first_type __go_back_one = __i.first;
|
||||
--__go_back_one;
|
||||
*__i.__second = __o(*__i.__first, *__go_back_one);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
|
@ -283,77 +283,77 @@ namespace __gnu_parallel
|
|||
// XXX move into type_traits?
|
||||
/** @brief Functor doing nothing
|
||||
*
|
||||
* For some reduction tasks (this is not a function object, but is
|
||||
* passed as selector dummy parameter.
|
||||
* For some __reduction tasks (this is not a function object, but is
|
||||
* passed as __selector __dummy parameter.
|
||||
*/
|
||||
struct nothing
|
||||
struct _Nothing
|
||||
{
|
||||
/** @brief Functor execution.
|
||||
* @param i Iterator referencing object. */
|
||||
template<typename It>
|
||||
* @param __i iterator referencing object. */
|
||||
template<typename _It>
|
||||
void
|
||||
operator()(It i) { }
|
||||
operator()(_It __i) { }
|
||||
};
|
||||
|
||||
/** @brief Reduction function doing nothing. */
|
||||
struct dummy_reduct
|
||||
struct _DummyReduct
|
||||
{
|
||||
bool
|
||||
operator()(bool /*x*/, bool /*y*/) const
|
||||
operator()(bool /*__x*/, bool /*__y*/) const
|
||||
{ return true; }
|
||||
};
|
||||
|
||||
/** @brief Reduction for finding the maximum element, using a comparator. */
|
||||
template<typename Comp, typename It>
|
||||
struct min_element_reduct
|
||||
template<typename _Compare, typename _It>
|
||||
struct __min_element_reduct
|
||||
{
|
||||
Comp& comp;
|
||||
_Compare& __comp;
|
||||
|
||||
explicit
|
||||
min_element_reduct(Comp &c) : comp(c) { }
|
||||
__min_element_reduct(_Compare &__c) : __comp(__c) { }
|
||||
|
||||
It
|
||||
operator()(It x, It y)
|
||||
_It
|
||||
operator()(_It __x, _It __y)
|
||||
{
|
||||
if (comp(*x, *y))
|
||||
return x;
|
||||
if (__comp(*__x, *__y))
|
||||
return __x;
|
||||
else
|
||||
return y;
|
||||
return __y;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief Reduction for finding the maximum element, using a comparator. */
|
||||
template<typename Comp, typename It>
|
||||
struct max_element_reduct
|
||||
template<typename _Compare, typename _It>
|
||||
struct __max_element_reduct
|
||||
{
|
||||
Comp& comp;
|
||||
_Compare& __comp;
|
||||
|
||||
explicit
|
||||
max_element_reduct(Comp& c) : comp(c) { }
|
||||
__max_element_reduct(_Compare& __c) : __comp(__c) { }
|
||||
|
||||
It
|
||||
operator()(It x, It y)
|
||||
_It
|
||||
operator()(_It __x, _It __y)
|
||||
{
|
||||
if (comp(*x, *y))
|
||||
return y;
|
||||
if (__comp(*__x, *__y))
|
||||
return __y;
|
||||
else
|
||||
return x;
|
||||
return __x;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief General reduction, using a binary operator. */
|
||||
template<typename BinOp>
|
||||
struct accumulate_binop_reduct
|
||||
template<typename _BinOp>
|
||||
struct __accumulate_binop_reduct
|
||||
{
|
||||
BinOp& binop;
|
||||
_BinOp& __binop;
|
||||
|
||||
explicit
|
||||
accumulate_binop_reduct(BinOp& b) : binop(b) { }
|
||||
__accumulate_binop_reduct(_BinOp& __b) : __binop(__b) { }
|
||||
|
||||
template<typename Result, typename Addend>
|
||||
Result
|
||||
operator()(const Result& x, const Addend& y)
|
||||
{ return binop(x, y); }
|
||||
template<typename _Result, typename _Addend>
|
||||
_Result
|
||||
operator()(const _Result& __x, const _Addend& __y)
|
||||
{ return __binop(__x, __y); }
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,160 +40,160 @@ namespace __gnu_parallel
|
|||
/** @brief A pair of iterators. The usual iterator operations are
|
||||
* applied to both child iterators.
|
||||
*/
|
||||
template<typename Iterator1, typename Iterator2, typename IteratorCategory>
|
||||
class iterator_pair : public std::pair<Iterator1, Iterator2>
|
||||
template<typename _Iterator1, typename _Iterator2, typename _IteratorCategory>
|
||||
class _IteratorPair : public std::pair<_Iterator1, _Iterator2>
|
||||
{
|
||||
private:
|
||||
typedef iterator_pair<Iterator1, Iterator2, IteratorCategory> type;
|
||||
typedef std::pair<Iterator1, Iterator2> base_type;
|
||||
typedef _IteratorPair<_Iterator1, _Iterator2, _IteratorCategory> _Self;
|
||||
typedef std::pair<_Iterator1, _Iterator2> _Base;
|
||||
|
||||
public:
|
||||
typedef IteratorCategory iterator_category;
|
||||
typedef _IteratorCategory iterator_category;
|
||||
typedef void value_type;
|
||||
|
||||
typedef std::iterator_traits<Iterator1> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef type* pointer;
|
||||
typedef type& reference;
|
||||
typedef std::iterator_traits<_Iterator1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type difference_type;
|
||||
typedef _Self* pointer;
|
||||
typedef _Self& reference;
|
||||
|
||||
iterator_pair() { }
|
||||
_IteratorPair() { }
|
||||
|
||||
iterator_pair(const Iterator1& first, const Iterator2& second)
|
||||
: base_type(first, second) { }
|
||||
_IteratorPair(const _Iterator1& __first, const _Iterator2& __second)
|
||||
: _Base(__first, __second) { }
|
||||
|
||||
// Pre-increment operator.
|
||||
type&
|
||||
_Self&
|
||||
operator++()
|
||||
{
|
||||
++base_type::first;
|
||||
++base_type::second;
|
||||
++_Base::first;
|
||||
++_Base::second;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Post-increment operator.
|
||||
const type
|
||||
const _Self
|
||||
operator++(int)
|
||||
{ return type(base_type::first++, base_type::second++); }
|
||||
{ return _Self(_Base::first++, _Base::second++); }
|
||||
|
||||
// Pre-decrement operator.
|
||||
type&
|
||||
_Self&
|
||||
operator--()
|
||||
{
|
||||
--base_type::first;
|
||||
--base_type::second;
|
||||
--_Base::first;
|
||||
--_Base::second;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Post-decrement operator.
|
||||
const type
|
||||
const _Self
|
||||
operator--(int)
|
||||
{ return type(base_type::first--, base_type::second--); }
|
||||
{ return _Self(_Base::first--, _Base::second--); }
|
||||
|
||||
// Type conversion.
|
||||
operator Iterator2() const
|
||||
{ return base_type::second; }
|
||||
operator _Iterator2() const
|
||||
{ return _Base::second; }
|
||||
|
||||
type&
|
||||
operator=(const type& other)
|
||||
_Self&
|
||||
operator=(const _Self& __other)
|
||||
{
|
||||
base_type::first = other.first;
|
||||
base_type::second = other.second;
|
||||
_Base::first = __other.first;
|
||||
_Base::second = __other.second;
|
||||
return *this;
|
||||
}
|
||||
|
||||
type
|
||||
operator+(difference_type delta) const
|
||||
{ return type(base_type::first + delta, base_type::second + delta); }
|
||||
_Self
|
||||
operator+(difference_type __delta) const
|
||||
{ return _Self(_Base::first + __delta, _Base::second + __delta); }
|
||||
|
||||
difference_type
|
||||
operator-(const type& other) const
|
||||
{ return base_type::first - other.first; }
|
||||
operator-(const _Self& __other) const
|
||||
{ return _Base::first - __other.first; }
|
||||
};
|
||||
|
||||
|
||||
/** @brief A triple of iterators. The usual iterator operations are
|
||||
applied to all three child iterators.
|
||||
*/
|
||||
template<typename Iterator1, typename Iterator2, typename Iterator3,
|
||||
typename IteratorCategory>
|
||||
class iterator_triple
|
||||
template<typename _Iterator1, typename _Iterator2, typename _Iterator3,
|
||||
typename _IteratorCategory>
|
||||
class _IteratorTriple
|
||||
{
|
||||
private:
|
||||
typedef iterator_triple<Iterator1, Iterator2, Iterator3,
|
||||
IteratorCategory> type;
|
||||
typedef _IteratorTriple<_Iterator1, _Iterator2, _Iterator3,
|
||||
_IteratorCategory> _Self;
|
||||
|
||||
public:
|
||||
typedef IteratorCategory iterator_category;
|
||||
typedef _IteratorCategory iterator_category;
|
||||
typedef void value_type;
|
||||
typedef typename std::iterator_traits<Iterator1>::difference_type
|
||||
typedef typename std::iterator_traits<_Iterator1>::difference_type
|
||||
difference_type;
|
||||
typedef type* pointer;
|
||||
typedef type& reference;
|
||||
typedef _Self* pointer;
|
||||
typedef _Self& reference;
|
||||
|
||||
Iterator1 first;
|
||||
Iterator2 second;
|
||||
Iterator3 third;
|
||||
_Iterator1 __first;
|
||||
_Iterator2 __second;
|
||||
_Iterator3 __third;
|
||||
|
||||
iterator_triple() { }
|
||||
_IteratorTriple() { }
|
||||
|
||||
iterator_triple(const Iterator1& _first, const Iterator2& _second,
|
||||
const Iterator3& _third)
|
||||
_IteratorTriple(const _Iterator1& _first, const _Iterator2& _second,
|
||||
const _Iterator3& _third)
|
||||
{
|
||||
first = _first;
|
||||
second = _second;
|
||||
third = _third;
|
||||
__first = _first;
|
||||
__second = _second;
|
||||
__third = _third;
|
||||
}
|
||||
|
||||
// Pre-increment operator.
|
||||
type&
|
||||
_Self&
|
||||
operator++()
|
||||
{
|
||||
++first;
|
||||
++second;
|
||||
++third;
|
||||
++__first;
|
||||
++__second;
|
||||
++__third;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Post-increment operator.
|
||||
const type
|
||||
const _Self
|
||||
operator++(int)
|
||||
{ return type(first++, second++, third++); }
|
||||
{ return _Self(__first++, __second++, __third++); }
|
||||
|
||||
// Pre-decrement operator.
|
||||
type&
|
||||
_Self&
|
||||
operator--()
|
||||
{
|
||||
--first;
|
||||
--second;
|
||||
--third;
|
||||
--__first;
|
||||
--__second;
|
||||
--__third;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Post-decrement operator.
|
||||
const type
|
||||
const _Self
|
||||
operator--(int)
|
||||
{ return type(first--, second--, third--); }
|
||||
{ return _Self(__first--, __second--, __third--); }
|
||||
|
||||
// Type conversion.
|
||||
operator Iterator3() const
|
||||
{ return third; }
|
||||
operator _Iterator3() const
|
||||
{ return __third; }
|
||||
|
||||
type&
|
||||
operator=(const type& other)
|
||||
_Self&
|
||||
operator=(const _Self& __other)
|
||||
{
|
||||
first = other.first;
|
||||
second = other.second;
|
||||
third = other.third;
|
||||
__first = __other.__first;
|
||||
__second = __other.__second;
|
||||
__third = __other.__third;
|
||||
return *this;
|
||||
}
|
||||
|
||||
type
|
||||
operator+(difference_type delta) const
|
||||
{ return type(first + delta, second + delta, third + delta); }
|
||||
_Self
|
||||
operator+(difference_type __delta) const
|
||||
{ return _Self(__first + __delta, __second + __delta, __third + __delta); }
|
||||
|
||||
difference_type
|
||||
operator-(const type& other) const
|
||||
{ return first - other.first; }
|
||||
operator-(const _Self& __other) const
|
||||
{ return __first - __other.__first; }
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,12 +3,12 @@
|
|||
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the terms
|
||||
// software; you can redistribute __it and/or modify __it under the terms
|
||||
// of the GNU General Public License as published by the Free Software
|
||||
// Foundation; either version 3, or (at your option) any later
|
||||
// version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful, but
|
||||
// This library is distributed in the hope that __it will be useful, but
|
||||
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// General Public License for more details.
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/** @file parallel/list_partition.h
|
||||
* @brief Functionality to split sequence referenced by only input
|
||||
* @brief _Functionality to split __sequence referenced by only input
|
||||
* iterators.
|
||||
* This file is a GNU parallel extension to the Standard C++ Library.
|
||||
*/
|
||||
|
|
@ -39,137 +39,137 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Shrinks and doubles the ranges.
|
||||
* @param os_starts Start positions worked on (oversampled).
|
||||
* @param count_to_two Counts up to 2.
|
||||
* @param range_length Current length of a chunk.
|
||||
* @param make_twice Whether the @c os_starts is allowed to be
|
||||
* @param __os_starts Start positions worked on (oversampled).
|
||||
* @param __count_to_two Counts up to 2.
|
||||
* @param __range_length Current length of a chunk.
|
||||
* @param __make_twice Whether the @__c __os_starts is allowed to be
|
||||
* grown or not
|
||||
*/
|
||||
template<typename InputIterator>
|
||||
template<typename _IIter>
|
||||
void
|
||||
shrink_and_double(std::vector<InputIterator>& os_starts,
|
||||
size_t& count_to_two, size_t& range_length,
|
||||
const bool make_twice)
|
||||
__shrink_and_double(std::vector<_IIter>& __os_starts,
|
||||
size_t& __count_to_two, size_t& __range_length,
|
||||
const bool __make_twice)
|
||||
{
|
||||
++count_to_two;
|
||||
if (not make_twice or count_to_two < 2)
|
||||
shrink(os_starts, count_to_two, range_length);
|
||||
++__count_to_two;
|
||||
if (not __make_twice or __count_to_two < 2)
|
||||
__shrink(__os_starts, __count_to_two, __range_length);
|
||||
else
|
||||
{
|
||||
os_starts.resize((os_starts.size() - 1) * 2 + 1);
|
||||
count_to_two = 0;
|
||||
__os_starts.resize((__os_starts.size() - 1) * 2 + 1);
|
||||
__count_to_two = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Combines two ranges into one and thus halves the number of ranges.
|
||||
* @param os_starts Start positions worked on (oversampled).
|
||||
* @param count_to_two Counts up to 2.
|
||||
* @param range_length Current length of a chunk. */
|
||||
template<typename InputIterator>
|
||||
* @param __os_starts Start positions worked on (oversampled).
|
||||
* @param __count_to_two Counts up to 2.
|
||||
* @param __range_length Current length of a chunk. */
|
||||
template<typename _IIter>
|
||||
void
|
||||
shrink(std::vector<InputIterator>& os_starts, size_t& count_to_two,
|
||||
size_t& range_length)
|
||||
__shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two,
|
||||
size_t& __range_length)
|
||||
{
|
||||
for (typename std::vector<InputIterator>::size_type i = 0;
|
||||
i <= (os_starts.size() / 2); ++i)
|
||||
os_starts[i] = os_starts[i * 2];
|
||||
range_length *= 2;
|
||||
for (typename std::vector<_IIter>::size_type __i = 0;
|
||||
__i <= (__os_starts.size() / 2); ++__i)
|
||||
__os_starts[__i] = __os_starts[__i * 2];
|
||||
__range_length *= 2;
|
||||
}
|
||||
|
||||
/** @brief Splits a sequence given by input iterators into parts of
|
||||
* almost equal size
|
||||
*
|
||||
* The function needs only one pass over the sequence.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param starts Start iterators for the resulting parts, dimension
|
||||
* @c num_parts+1. For convenience, @c starts @c [num_parts]
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __starts Start iterators for the resulting parts, dimension
|
||||
* @__c __num_parts+1. For convenience, @__c __starts @__c [__num_parts]
|
||||
* contains the end iterator of the sequence.
|
||||
* @param lengths Length of the resulting parts.
|
||||
* @param num_parts Number of parts to split the sequence into.
|
||||
* @param f Functor to be applied to each element by traversing it
|
||||
* @param oversampling Oversampling factor. If 0, then the
|
||||
* partitions will differ in at most @f$ \sqrt{\mathrm{end} -
|
||||
* \mathrm{begin}} @f$ elements. Otherwise, the ratio between the
|
||||
* longest and the shortest part is bounded by @f$
|
||||
* 1/(\mathrm{oversampling} \cdot \mathrm{num\_parts}) @f$.
|
||||
* @param __lengths Length of the resulting parts.
|
||||
* @param __num_parts Number of parts to split the sequence into.
|
||||
* @param __f Functor to be applied to each element by traversing __it
|
||||
* @param __oversampling Oversampling factor. If 0, then the
|
||||
* partitions will differ in at most @__f$ \sqrt{\mathrm{__end} -
|
||||
* \mathrm{__begin}} @__f$ __elements. Otherwise, the ratio between the
|
||||
* longest and the shortest part is bounded by @__f$
|
||||
* 1/(\mathrm{__oversampling} \cdot \mathrm{num\_parts}) @__f$.
|
||||
* @return Length of the whole sequence.
|
||||
*/
|
||||
template<typename InputIterator, typename FunctorType>
|
||||
template<typename _IIter, typename _FunctorType>
|
||||
size_t
|
||||
list_partition(const InputIterator begin, const InputIterator end,
|
||||
InputIterator* starts, size_t* lengths, const int num_parts,
|
||||
FunctorType& f, int oversampling = 0)
|
||||
list_partition(const _IIter __begin, const _IIter __end,
|
||||
_IIter* __starts, size_t* __lengths, const int __num_parts,
|
||||
_FunctorType& __f, int __oversampling = 0)
|
||||
{
|
||||
bool make_twice = false;
|
||||
bool __make_twice = false;
|
||||
|
||||
// The resizing algorithm is chosen according to the oversampling factor.
|
||||
if (oversampling == 0)
|
||||
if (__oversampling == 0)
|
||||
{
|
||||
make_twice = true;
|
||||
oversampling = 1;
|
||||
__make_twice = true;
|
||||
__oversampling = 1;
|
||||
}
|
||||
|
||||
std::vector<InputIterator> os_starts(2 * oversampling * num_parts + 1);
|
||||
std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1);
|
||||
|
||||
os_starts[0]= begin;
|
||||
InputIterator prev = begin, it = begin;
|
||||
size_t dist_limit = 0, dist = 0;
|
||||
size_t cur = 1, next = 1;
|
||||
size_t range_length = 1;
|
||||
size_t count_to_two = 0;
|
||||
while (it != end)
|
||||
__os_starts[0]= __begin;
|
||||
_IIter __prev = __begin, __it = __begin;
|
||||
size_t __dist_limit = 0, __dist = 0;
|
||||
size_t __cur = 1, __next = 1;
|
||||
size_t __range_length = 1;
|
||||
size_t __count_to_two = 0;
|
||||
while (__it != __end)
|
||||
{
|
||||
cur = next;
|
||||
for (; cur < os_starts.size() and it != end; ++cur)
|
||||
__cur = __next;
|
||||
for (; __cur < __os_starts.size() and __it != __end; ++__cur)
|
||||
{
|
||||
for (dist_limit += range_length;
|
||||
dist < dist_limit and it != end; ++dist)
|
||||
for (__dist_limit += __range_length;
|
||||
__dist < __dist_limit and __it != __end; ++__dist)
|
||||
{
|
||||
f(it);
|
||||
++it;
|
||||
__f(__it);
|
||||
++__it;
|
||||
}
|
||||
os_starts[cur] = it;
|
||||
__os_starts[__cur] = __it;
|
||||
}
|
||||
|
||||
// Must compare for end and not cur < os_starts.size() , because
|
||||
// cur could be == os_starts.size() as well
|
||||
if (it == end)
|
||||
// Must compare for end and not __cur < __os_starts.size() , because
|
||||
// __cur could be == __os_starts.size() as well
|
||||
if (__it == __end)
|
||||
break;
|
||||
|
||||
shrink_and_double(os_starts, count_to_two, range_length, make_twice);
|
||||
next = os_starts.size() / 2 + 1;
|
||||
__shrink_and_double(__os_starts, __count_to_two, __range_length, __make_twice);
|
||||
__next = __os_starts.size() / 2 + 1;
|
||||
}
|
||||
|
||||
// Calculation of the parts (one must be extracted from current
|
||||
// because the partition beginning at end, consists only of
|
||||
// Calculation of the parts (one must be extracted from __current
|
||||
// because the partition beginning at __end, consists only of
|
||||
// itself).
|
||||
size_t size_part = (cur - 1) / num_parts;
|
||||
int size_greater = static_cast<int>((cur - 1) % num_parts);
|
||||
starts[0] = os_starts[0];
|
||||
size_t __size_part = (__cur - 1) / __num_parts;
|
||||
int __size_greater = static_cast<int>((__cur - 1) % __num_parts);
|
||||
__starts[0] = __os_starts[0];
|
||||
|
||||
size_t index = 0;
|
||||
size_t __index = 0;
|
||||
|
||||
// Smallest partitions.
|
||||
for (int i = 1; i < (num_parts + 1 - size_greater); ++i)
|
||||
for (int __i = 1; __i < (__num_parts + 1 - __size_greater); ++__i)
|
||||
{
|
||||
lengths[i - 1] = size_part * range_length;
|
||||
index += size_part;
|
||||
starts[i] = os_starts[index];
|
||||
__lengths[__i - 1] = __size_part * __range_length;
|
||||
__index += __size_part;
|
||||
__starts[__i] = __os_starts[__index];
|
||||
}
|
||||
|
||||
// Biggest partitions.
|
||||
for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i)
|
||||
for (int __i = __num_parts + 1 - __size_greater; __i <= __num_parts; ++__i)
|
||||
{
|
||||
lengths[i - 1] = (size_part+1) * range_length;
|
||||
index += (size_part+1);
|
||||
starts[i] = os_starts[index];
|
||||
__lengths[__i - 1] = (__size_part+1) * __range_length;
|
||||
__index += (__size_part+1);
|
||||
__starts[__i] = __os_starts[__index];
|
||||
}
|
||||
|
||||
// Correction of the end size (the end iteration has not finished).
|
||||
lengths[num_parts - 1] -= (dist_limit - dist);
|
||||
__lengths[__num_parts - 1] -= (__dist_limit - __dist);
|
||||
|
||||
return dist;
|
||||
return __dist;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -37,224 +37,224 @@
|
|||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Merge routine being able to merge only the @c max_length
|
||||
/** @brief Merge routine being able to merge only the @__c __max_length
|
||||
* smallest elements.
|
||||
*
|
||||
* The @c begin iterators are advanced accordingly, they might not
|
||||
* reach @c end, in contrast to the usual variant.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param end2 End iterator of second sequence.
|
||||
* @param target Target begin iterator.
|
||||
* @param max_length Maximum number of elements to merge.
|
||||
* @param comp Comparator.
|
||||
* The @__c __begin iterators are advanced accordingly, they might not
|
||||
* reach @__c __end, in contrast to the usual variant.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __end2 End iterator of second sequence.
|
||||
* @param __target Target begin iterator.
|
||||
* @param __max_length Maximum number of elements to merge.
|
||||
* @param __comp Comparator.
|
||||
* @return Output end iterator. */
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename OutputIterator, typename _DifferenceTp,
|
||||
typename Comparator>
|
||||
OutputIterator
|
||||
merge_advance_usual(RandomAccessIterator1& begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2& begin2,
|
||||
RandomAccessIterator2 end2, OutputIterator target,
|
||||
_DifferenceTp max_length, Comparator comp)
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _OutputIterator, typename _DifferenceTp,
|
||||
typename _Compare>
|
||||
_OutputIterator
|
||||
__merge_advance_usual(_RAIter1& __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2& __begin2,
|
||||
_RAIter2 __end2, _OutputIterator __target,
|
||||
_DifferenceTp __max_length, _Compare __comp)
|
||||
{
|
||||
typedef _DifferenceTp difference_type;
|
||||
while (begin1 != end1 && begin2 != end2 && max_length > 0)
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
|
||||
{
|
||||
// array1[i1] < array0[i0]
|
||||
if (comp(*begin2, *begin1))
|
||||
*target++ = *begin2++;
|
||||
// array1[__i1] < array0[i0]
|
||||
if (__comp(*__begin2, *__begin1))
|
||||
*__target++ = *__begin2++;
|
||||
else
|
||||
*target++ = *begin1++;
|
||||
--max_length;
|
||||
*__target++ = *__begin1++;
|
||||
--__max_length;
|
||||
}
|
||||
|
||||
if (begin1 != end1)
|
||||
if (__begin1 != __end1)
|
||||
{
|
||||
target = std::copy(begin1, begin1 + max_length, target);
|
||||
begin1 += max_length;
|
||||
__target = std::copy(__begin1, __begin1 + __max_length, __target);
|
||||
__begin1 += __max_length;
|
||||
}
|
||||
else
|
||||
{
|
||||
target = std::copy(begin2, begin2 + max_length, target);
|
||||
begin2 += max_length;
|
||||
__target = std::copy(__begin2, __begin2 + __max_length, __target);
|
||||
__begin2 += __max_length;
|
||||
}
|
||||
return target;
|
||||
return __target;
|
||||
}
|
||||
|
||||
/** @brief Merge routine being able to merge only the @c max_length
|
||||
/** @brief Merge routine being able to merge only the @__c __max_length
|
||||
* smallest elements.
|
||||
*
|
||||
* The @c begin iterators are advanced accordingly, they might not
|
||||
* reach @c end, in contrast to the usual variant.
|
||||
* The @__c __begin iterators are advanced accordingly, they might not
|
||||
* reach @__c __end, in contrast to the usual variant.
|
||||
* Specially designed code should allow the compiler to generate
|
||||
* conditional moves instead of branches.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param end2 End iterator of second sequence.
|
||||
* @param target Target begin iterator.
|
||||
* @param max_length Maximum number of elements to merge.
|
||||
* @param comp Comparator.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __end2 End iterator of second sequence.
|
||||
* @param __target Target begin iterator.
|
||||
* @param __max_length Maximum number of elements to merge.
|
||||
* @param __comp Comparator.
|
||||
* @return Output end iterator. */
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename OutputIterator, typename _DifferenceTp,
|
||||
typename Comparator>
|
||||
OutputIterator
|
||||
merge_advance_movc(RandomAccessIterator1& begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2& begin2,
|
||||
RandomAccessIterator2 end2,
|
||||
OutputIterator target,
|
||||
_DifferenceTp max_length, Comparator comp)
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _OutputIterator, typename _DifferenceTp,
|
||||
typename _Compare>
|
||||
_OutputIterator
|
||||
__merge_advance_movc(_RAIter1& __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2& __begin2,
|
||||
_RAIter2 __end2,
|
||||
_OutputIterator __target,
|
||||
_DifferenceTp __max_length, _Compare __comp)
|
||||
{
|
||||
typedef _DifferenceTp difference_type;
|
||||
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
typedef typename std::iterator_traits<_RAIter1>::value_type
|
||||
value_type1;
|
||||
typedef typename std::iterator_traits<RandomAccessIterator2>::value_type
|
||||
typedef typename std::iterator_traits<_RAIter2>::value_type
|
||||
value_type2;
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(max_length >= 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__max_length >= 0);
|
||||
#endif
|
||||
|
||||
while (begin1 != end1 && begin2 != end2 && max_length > 0)
|
||||
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
|
||||
{
|
||||
RandomAccessIterator1 next1 = begin1 + 1;
|
||||
RandomAccessIterator2 next2 = begin2 + 1;
|
||||
value_type1 element1 = *begin1;
|
||||
value_type2 element2 = *begin2;
|
||||
_RAIter1 __next1 = __begin1 + 1;
|
||||
_RAIter2 __next2 = __begin2 + 1;
|
||||
value_type1 __element1 = *__begin1;
|
||||
value_type2 __element2 = *__begin2;
|
||||
|
||||
if (comp(element2, element1))
|
||||
if (__comp(__element2, __element1))
|
||||
{
|
||||
element1 = element2;
|
||||
begin2 = next2;
|
||||
__element1 = __element2;
|
||||
__begin2 = __next2;
|
||||
}
|
||||
else
|
||||
begin1 = next1;
|
||||
__begin1 = __next1;
|
||||
|
||||
*target = element1;
|
||||
*__target = __element1;
|
||||
|
||||
++target;
|
||||
--max_length;
|
||||
++__target;
|
||||
--__max_length;
|
||||
}
|
||||
if (begin1 != end1)
|
||||
if (__begin1 != __end1)
|
||||
{
|
||||
target = std::copy(begin1, begin1 + max_length, target);
|
||||
begin1 += max_length;
|
||||
__target = std::copy(__begin1, __begin1 + __max_length, __target);
|
||||
__begin1 += __max_length;
|
||||
}
|
||||
else
|
||||
{
|
||||
target = std::copy(begin2, begin2 + max_length, target);
|
||||
begin2 += max_length;
|
||||
__target = std::copy(__begin2, __begin2 + __max_length, __target);
|
||||
__begin2 += __max_length;
|
||||
}
|
||||
return target;
|
||||
return __target;
|
||||
}
|
||||
|
||||
/** @brief Merge routine being able to merge only the @c max_length
|
||||
/** @brief Merge routine being able to merge only the @__c __max_length
|
||||
* smallest elements.
|
||||
*
|
||||
* The @c begin iterators are advanced accordingly, they might not
|
||||
* reach @c end, in contrast to the usual variant.
|
||||
* The @__c __begin iterators are advanced accordingly, they might not
|
||||
* reach @__c __end, in contrast to the usual variant.
|
||||
* Static switch on whether to use the conditional-move variant.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param end2 End iterator of second sequence.
|
||||
* @param target Target begin iterator.
|
||||
* @param max_length Maximum number of elements to merge.
|
||||
* @param comp Comparator.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __end2 End iterator of second sequence.
|
||||
* @param __target Target begin iterator.
|
||||
* @param __max_length Maximum number of elements to merge.
|
||||
* @param __comp Comparator.
|
||||
* @return Output end iterator. */
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename OutputIterator, typename _DifferenceTp,
|
||||
typename Comparator>
|
||||
inline OutputIterator
|
||||
merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2& begin2, RandomAccessIterator2 end2,
|
||||
OutputIterator target, _DifferenceTp max_length,
|
||||
Comparator comp)
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _OutputIterator, typename _DifferenceTp,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
|
||||
_RAIter2& __begin2, _RAIter2 __end2,
|
||||
_OutputIterator __target, _DifferenceTp __max_length,
|
||||
_Compare __comp)
|
||||
{
|
||||
_GLIBCXX_CALL(max_length)
|
||||
_GLIBCXX_CALL(__max_length)
|
||||
|
||||
return merge_advance_movc(begin1, end1, begin2, end2, target,
|
||||
max_length, comp);
|
||||
return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target,
|
||||
__max_length, __comp);
|
||||
}
|
||||
|
||||
/** @brief Merge routine fallback to sequential in case the
|
||||
iterators of the two input sequences are of different type.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param end2 End iterator of second sequence.
|
||||
* @param target Target begin iterator.
|
||||
* @param max_length Maximum number of elements to merge.
|
||||
* @param comp Comparator.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __end2 End iterator of second sequence.
|
||||
* @param __target Target begin iterator.
|
||||
* @param __max_length Maximum number of elements to merge.
|
||||
* @param __comp Comparator.
|
||||
* @return Output end iterator. */
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename RandomAccessIterator3, typename Comparator>
|
||||
inline RandomAccessIterator3
|
||||
parallel_merge_advance(RandomAccessIterator1& begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2& begin2,
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _RAIter3, typename _Compare>
|
||||
inline _RAIter3
|
||||
__parallel_merge_advance(_RAIter1& __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2& __begin2,
|
||||
// different iterators, parallel implementation
|
||||
// not available
|
||||
RandomAccessIterator2 end2,
|
||||
RandomAccessIterator3 target, typename
|
||||
std::iterator_traits<RandomAccessIterator1>::
|
||||
difference_type max_length, Comparator comp)
|
||||
{ return merge_advance(begin1, end1, begin2, end2, target,
|
||||
max_length, comp); }
|
||||
_RAIter2 __end2,
|
||||
_RAIter3 __target, typename
|
||||
std::iterator_traits<_RAIter1>::
|
||||
difference_type __max_length, _Compare __comp)
|
||||
{ return __merge_advance(__begin1, __end1, __begin2, __end2, __target,
|
||||
__max_length, __comp); }
|
||||
|
||||
/** @brief Parallel merge routine being able to merge only the @c
|
||||
* max_length smallest elements.
|
||||
/** @brief Parallel merge routine being able to merge only the @__c
|
||||
* __max_length smallest elements.
|
||||
*
|
||||
* The @c begin iterators are advanced accordingly, they might not
|
||||
* reach @c end, in contrast to the usual variant.
|
||||
* The @__c __begin iterators are advanced accordingly, they might not
|
||||
* reach @__c __end, in contrast to the usual variant.
|
||||
* The functionality is projected onto parallel_multiway_merge.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param end2 End iterator of second sequence.
|
||||
* @param target Target begin iterator.
|
||||
* @param max_length Maximum number of elements to merge.
|
||||
* @param comp Comparator.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __end2 End iterator of second sequence.
|
||||
* @param __target Target begin iterator.
|
||||
* @param __max_length Maximum number of elements to merge.
|
||||
* @param __comp Comparator.
|
||||
* @return Output end iterator.
|
||||
*/
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator3,
|
||||
typename Comparator>
|
||||
inline RandomAccessIterator3
|
||||
parallel_merge_advance(RandomAccessIterator1& begin1,
|
||||
RandomAccessIterator1 end1,
|
||||
RandomAccessIterator1& begin2,
|
||||
RandomAccessIterator1 end2,
|
||||
RandomAccessIterator3 target, typename
|
||||
std::iterator_traits<RandomAccessIterator1>::
|
||||
difference_type max_length, Comparator comp)
|
||||
template<typename _RAIter1, typename _RAIter3,
|
||||
typename _Compare>
|
||||
inline _RAIter3
|
||||
__parallel_merge_advance(_RAIter1& __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter1& __begin2,
|
||||
_RAIter1 __end2,
|
||||
_RAIter3 __target, typename
|
||||
std::iterator_traits<_RAIter1>::
|
||||
difference_type __max_length, _Compare __comp)
|
||||
{
|
||||
typedef typename
|
||||
std::iterator_traits<RandomAccessIterator1>::value_type value_type;
|
||||
typedef typename std::iterator_traits<RandomAccessIterator1>::
|
||||
difference_type difference_type1 /* == difference_type2 */;
|
||||
typedef typename std::iterator_traits<RandomAccessIterator3>::
|
||||
difference_type difference_type3;
|
||||
typedef typename std::pair<RandomAccessIterator1, RandomAccessIterator1>
|
||||
iterator_pair;
|
||||
std::iterator_traits<_RAIter1>::value_type _ValueType;
|
||||
typedef typename std::iterator_traits<_RAIter1>::
|
||||
difference_type _DifferenceType1 /* == difference_type2 */;
|
||||
typedef typename std::iterator_traits<_RAIter3>::
|
||||
difference_type _DifferenceType3;
|
||||
typedef typename std::pair<_RAIter1, _RAIter1>
|
||||
_IteratorPair;
|
||||
|
||||
iterator_pair
|
||||
seqs[2] = { std::make_pair(begin1, end1),
|
||||
std::make_pair(begin2, end2) };
|
||||
RandomAccessIterator3
|
||||
target_end = parallel_multiway_merge
|
||||
< /* stable = */ true, /* sentinels = */ false>(
|
||||
seqs, seqs + 2, target,
|
||||
_IteratorPair
|
||||
seqs[2] = { std::make_pair(__begin1, __end1),
|
||||
std::make_pair(__begin2, __end2) };
|
||||
_RAIter3
|
||||
__target_end = parallel_multiway_merge
|
||||
< /* __stable = */ true, /* __sentinels = */ false>(
|
||||
seqs, seqs + 2, __target,
|
||||
multiway_merge_exact_splitting
|
||||
< /* stable = */ true, iterator_pair*,
|
||||
Comparator, difference_type1>,
|
||||
max_length, comp, omp_get_max_threads());
|
||||
< /* __stable = */ true, _IteratorPair*,
|
||||
_Compare, _DifferenceType1>,
|
||||
__max_length, __comp, omp_get_max_threads());
|
||||
|
||||
return target_end;
|
||||
return __target_end;
|
||||
}
|
||||
} //namespace __gnu_parallel
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/** @file parallel/multiseq_selection.h
|
||||
* @brief Functions to find elements of a certain global rank in
|
||||
* @brief Functions to find elements of a certain global __rank in
|
||||
* multiple sorted sequences. Also serves for splitting such
|
||||
* sequence sets.
|
||||
*
|
||||
|
|
@ -50,275 +50,275 @@
|
|||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Compare a pair of types lexicographically, ascending. */
|
||||
template<typename T1, typename T2, typename Comparator>
|
||||
class lexicographic
|
||||
: public std::binary_function<std::pair<T1, T2>, std::pair<T1, T2>, bool>
|
||||
/** @brief Compare __a pair of types lexicographically, ascending. */
|
||||
template<typename _T1, typename _T2, typename _Compare>
|
||||
class _Lexicographic
|
||||
: public std::binary_function<std::pair<_T1, _T2>, std::pair<_T1, _T2>, bool>
|
||||
{
|
||||
private:
|
||||
Comparator& comp;
|
||||
_Compare& __comp;
|
||||
|
||||
public:
|
||||
lexicographic(Comparator& _comp) : comp(_comp) { }
|
||||
_Lexicographic(_Compare& _comp) : __comp(_comp) { }
|
||||
|
||||
bool
|
||||
operator()(const std::pair<T1, T2>& p1,
|
||||
const std::pair<T1, T2>& p2) const
|
||||
operator()(const std::pair<_T1, _T2>& __p1,
|
||||
const std::pair<_T1, _T2>& __p2) const
|
||||
{
|
||||
if (comp(p1.first, p2.first))
|
||||
if (__comp(__p1.first, __p2.first))
|
||||
return true;
|
||||
|
||||
if (comp(p2.first, p1.first))
|
||||
if (__comp(__p2.first, __p1.first))
|
||||
return false;
|
||||
|
||||
// Firsts are equal.
|
||||
return p1.second < p2.second;
|
||||
return __p1.second < __p2.second;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief Compare a pair of types lexicographically, descending. */
|
||||
template<typename T1, typename T2, typename Comparator>
|
||||
class lexicographic_reverse : public std::binary_function<T1, T2, bool>
|
||||
/** @brief Compare __a pair of types lexicographically, descending. */
|
||||
template<typename _T1, typename _T2, typename _Compare>
|
||||
class _LexicographicReverse : public std::binary_function<_T1, _T2, bool>
|
||||
{
|
||||
private:
|
||||
Comparator& comp;
|
||||
_Compare& __comp;
|
||||
|
||||
public:
|
||||
lexicographic_reverse(Comparator& _comp) : comp(_comp) { }
|
||||
_LexicographicReverse(_Compare& _comp) : __comp(_comp) { }
|
||||
|
||||
bool
|
||||
operator()(const std::pair<T1, T2>& p1,
|
||||
const std::pair<T1, T2>& p2) const
|
||||
operator()(const std::pair<_T1, _T2>& __p1,
|
||||
const std::pair<_T1, _T2>& __p2) const
|
||||
{
|
||||
if (comp(p2.first, p1.first))
|
||||
if (__comp(__p2.first, __p1.first))
|
||||
return true;
|
||||
|
||||
if (comp(p1.first, p2.first))
|
||||
if (__comp(__p1.first, __p2.first))
|
||||
return false;
|
||||
|
||||
// Firsts are equal.
|
||||
return p2.second < p1.second;
|
||||
return __p2.second < __p1.second;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Splits several sorted sequences at a certain global rank,
|
||||
* @brief Splits several sorted sequences at __a certain global __rank,
|
||||
* resulting in a splitting point for each sequence.
|
||||
* The sequences are passed via a sequence of random-access
|
||||
* The sequences are passed via __a __sequence of random-access
|
||||
* iterator pairs, none of the sequences may be empty. If there
|
||||
* are several equal elements across the split, the ones on the
|
||||
* left side will be chosen from sequences with smaller number.
|
||||
* @param begin_seqs Begin of the sequence of iterator pairs.
|
||||
* @param end_seqs End of the sequence of iterator pairs.
|
||||
* @param rank The global rank to partition at.
|
||||
* @param begin_offsets A random-access sequence begin where the
|
||||
* result will be stored in. Each element of the sequence is an
|
||||
* __left side will be chosen from sequences with smaller number.
|
||||
* @param __begin_seqs Begin of the sequence of iterator pairs.
|
||||
* @param __end_seqs End of the sequence of iterator pairs.
|
||||
* @param __rank The global __rank to partition at.
|
||||
* @param __begin_offsets A random-access __sequence __begin where the
|
||||
* __result will be stored in. Each element of the sequence is an
|
||||
* iterator that points to the first element on the greater part of
|
||||
* the respective sequence.
|
||||
* @param comp The ordering functor, defaults to std::less<T>.
|
||||
* the respective __sequence.
|
||||
* @param __comp The ordering functor, defaults to std::less<_Tp>.
|
||||
*/
|
||||
template<typename RanSeqs, typename RankType, typename RankIterator,
|
||||
typename Comparator>
|
||||
template<typename _RanSeqs, typename _RankType, typename _RankIterator,
|
||||
typename _Compare>
|
||||
void
|
||||
multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs,
|
||||
RankType rank,
|
||||
RankIterator begin_offsets,
|
||||
Comparator comp = std::less<
|
||||
multiseq_partition(_RanSeqs __begin_seqs, _RanSeqs __end_seqs,
|
||||
_RankType __rank,
|
||||
_RankIterator __begin_offsets,
|
||||
_Compare __comp = std::less<
|
||||
typename std::iterator_traits<typename
|
||||
std::iterator_traits<RanSeqs>::value_type::
|
||||
first_type>::value_type>()) // std::less<T>
|
||||
std::iterator_traits<_RanSeqs>::value_type::
|
||||
first_type>::value_type>()) // std::less<_Tp>
|
||||
{
|
||||
_GLIBCXX_CALL(end_seqs - begin_seqs)
|
||||
_GLIBCXX_CALL(__end_seqs - __begin_seqs)
|
||||
|
||||
typedef typename std::iterator_traits<RanSeqs>::value_type::first_type
|
||||
It;
|
||||
typedef typename std::iterator_traits<It>::difference_type
|
||||
difference_type;
|
||||
typedef typename std::iterator_traits<It>::value_type value_type;
|
||||
typedef typename std::iterator_traits<_RanSeqs>::value_type::first_type
|
||||
_It;
|
||||
typedef typename std::iterator_traits<_It>::difference_type
|
||||
_DifferenceType;
|
||||
typedef typename std::iterator_traits<_It>::value_type _ValueType;
|
||||
|
||||
lexicographic<value_type, int, Comparator> lcomp(comp);
|
||||
lexicographic_reverse<value_type, int, Comparator> lrcomp(comp);
|
||||
_Lexicographic<_ValueType, int, _Compare> __lcomp(__comp);
|
||||
_LexicographicReverse<_ValueType, int, _Compare> __lrcomp(__comp);
|
||||
|
||||
// Number of sequences, number of elements in total (possibly
|
||||
// including padding).
|
||||
difference_type m = std::distance(begin_seqs, end_seqs), N = 0,
|
||||
nmax, n, r;
|
||||
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __N = 0,
|
||||
__nmax, __n, __r;
|
||||
|
||||
for (int i = 0; i < m; i++)
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
{
|
||||
N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
|
||||
__N += std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second);
|
||||
_GLIBCXX_PARALLEL_ASSERT(
|
||||
std::distance(begin_seqs[i].first, begin_seqs[i].second) > 0);
|
||||
std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second) > 0);
|
||||
}
|
||||
|
||||
if (rank == N)
|
||||
if (__rank == __N)
|
||||
{
|
||||
for (int i = 0; i < m; i++)
|
||||
begin_offsets[i] = begin_seqs[i].second; // Very end.
|
||||
// Return m - 1;
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
__begin_offsets[__i] = __begin_seqs[__i].second; // Very end.
|
||||
// Return __m - 1;
|
||||
return;
|
||||
}
|
||||
|
||||
_GLIBCXX_PARALLEL_ASSERT(m != 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(N != 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(rank >= 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(rank < N);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__m != 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__N != 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__rank >= 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__rank < __N);
|
||||
|
||||
difference_type* ns = new difference_type[m];
|
||||
difference_type* a = new difference_type[m];
|
||||
difference_type* b = new difference_type[m];
|
||||
difference_type l;
|
||||
_DifferenceType* __ns = new _DifferenceType[__m];
|
||||
_DifferenceType* __a = new _DifferenceType[__m];
|
||||
_DifferenceType* __b = new _DifferenceType[__m];
|
||||
_DifferenceType __l;
|
||||
|
||||
ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
|
||||
nmax = ns[0];
|
||||
for (int i = 0; i < m; i++)
|
||||
__ns[0] = std::distance(__begin_seqs[0].first, __begin_seqs[0].second);
|
||||
__nmax = __ns[0];
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
{
|
||||
ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
|
||||
nmax = std::max(nmax, ns[i]);
|
||||
__ns[__i] = std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second);
|
||||
__nmax = std::max(__nmax, __ns[__i]);
|
||||
}
|
||||
|
||||
r = __log2(nmax) + 1;
|
||||
__r = __log2(__nmax) + 1;
|
||||
|
||||
// Pad all lists to this length, at least as long as any ns[i],
|
||||
// equality iff nmax = 2^k - 1.
|
||||
l = (1ULL << r) - 1;
|
||||
// Pad all lists to this length, at least as long as any ns[__i],
|
||||
// equality iff __nmax = 2^__k - 1.
|
||||
__l = (1ULL << __r) - 1;
|
||||
|
||||
// From now on, including padding.
|
||||
N = l * m;
|
||||
__N = __l * __m;
|
||||
|
||||
for (int i = 0; i < m; i++)
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
{
|
||||
a[i] = 0;
|
||||
b[i] = l;
|
||||
__a[__i] = 0;
|
||||
__b[__i] = __l;
|
||||
}
|
||||
n = l / 2;
|
||||
__n = __l / 2;
|
||||
|
||||
// Invariants:
|
||||
// 0 <= a[i] <= ns[i], 0 <= b[i] <= l
|
||||
// 0 <= __a[__i] <= __ns[__i], 0 <= __b[__i] <= __l
|
||||
|
||||
#define S(i) (begin_seqs[i].first)
|
||||
#define __S(__i) (__begin_seqs[__i].first)
|
||||
|
||||
// Initial partition.
|
||||
std::vector<std::pair<value_type, int> > sample;
|
||||
std::vector<std::pair<_ValueType, int> > __sample;
|
||||
|
||||
for (int i = 0; i < m; i++)
|
||||
if (n < ns[i]) //sequence long enough
|
||||
sample.push_back(std::make_pair(S(i)[n], i));
|
||||
__gnu_sequential::sort(sample.begin(), sample.end(), lcomp);
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
if (__n < __ns[__i]) //__sequence long enough
|
||||
__sample.push_back(std::make_pair(__S(__i)[__n], __i));
|
||||
__gnu_sequential::sort(__sample.begin(), __sample.end(), __lcomp);
|
||||
|
||||
for (int i = 0; i < m; i++) //conceptual infinity
|
||||
if (n >= ns[i]) //sequence too short, conceptual infinity
|
||||
sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
|
||||
for (int __i = 0; __i < __m; __i++) //conceptual infinity
|
||||
if (__n >= __ns[__i]) //__sequence too short, conceptual infinity
|
||||
__sample.push_back(std::make_pair(__S(__i)[0] /*__dummy element*/, __i));
|
||||
|
||||
difference_type localrank = rank * m / N ;
|
||||
_DifferenceType localrank = __rank * __m / __N ;
|
||||
|
||||
int j;
|
||||
for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
|
||||
a[sample[j].second] += n + 1;
|
||||
for (; j < m; j++)
|
||||
b[sample[j].second] -= n + 1;
|
||||
int __j;
|
||||
for (__j = 0; __j < localrank && ((__n + 1) <= __ns[__sample[__j].second]); ++__j)
|
||||
__a[__sample[__j].second] += __n + 1;
|
||||
for (; __j < __m; __j++)
|
||||
__b[__sample[__j].second] -= __n + 1;
|
||||
|
||||
// Further refinement.
|
||||
while (n > 0)
|
||||
while (__n > 0)
|
||||
{
|
||||
n /= 2;
|
||||
__n /= 2;
|
||||
|
||||
int lmax_seq = -1; // to avoid warning
|
||||
const value_type* lmax = NULL; // impossible to avoid the warning?
|
||||
for (int i = 0; i < m; i++)
|
||||
int __lmax_seq = -1; // to avoid warning
|
||||
const _ValueType* __lmax = NULL; // impossible to avoid the warning?
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
{
|
||||
if (a[i] > 0)
|
||||
if (__a[__i] > 0)
|
||||
{
|
||||
if (!lmax)
|
||||
if (!__lmax)
|
||||
{
|
||||
lmax = &(S(i)[a[i] - 1]);
|
||||
lmax_seq = i;
|
||||
__lmax = &(__S(__i)[__a[__i] - 1]);
|
||||
__lmax_seq = __i;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Max, favor rear sequences.
|
||||
if (!comp(S(i)[a[i] - 1], *lmax))
|
||||
if (!__comp(__S(__i)[__a[__i] - 1], *__lmax))
|
||||
{
|
||||
lmax = &(S(i)[a[i] - 1]);
|
||||
lmax_seq = i;
|
||||
__lmax = &(__S(__i)[__a[__i] - 1]);
|
||||
__lmax_seq = __i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i = 0; i < m; i++)
|
||||
int __i;
|
||||
for (__i = 0; __i < __m; __i++)
|
||||
{
|
||||
difference_type middle = (b[i] + a[i]) / 2;
|
||||
if (lmax && middle < ns[i] &&
|
||||
lcomp(std::make_pair(S(i)[middle], i),
|
||||
std::make_pair(*lmax, lmax_seq)))
|
||||
a[i] = std::min(a[i] + n + 1, ns[i]);
|
||||
_DifferenceType __middle = (__b[__i] + __a[__i]) / 2;
|
||||
if (__lmax && __middle < __ns[__i] &&
|
||||
__lcomp(std::make_pair(__S(__i)[__middle], __i),
|
||||
std::make_pair(*__lmax, __lmax_seq)))
|
||||
__a[__i] = std::min(__a[__i] + __n + 1, __ns[__i]);
|
||||
else
|
||||
b[i] -= n + 1;
|
||||
__b[__i] -= __n + 1;
|
||||
}
|
||||
|
||||
difference_type leftsize = 0, total = 0;
|
||||
for (int i = 0; i < m; i++)
|
||||
_DifferenceType __leftsize = 0, __total = 0;
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
{
|
||||
leftsize += a[i] / (n + 1);
|
||||
total += l / (n + 1);
|
||||
__leftsize += __a[__i] / (__n + 1);
|
||||
__total += __l / (__n + 1);
|
||||
}
|
||||
|
||||
difference_type skew = static_cast<difference_type>
|
||||
(static_cast<uint64>(total) * rank / N - leftsize);
|
||||
_DifferenceType __skew = static_cast<_DifferenceType>
|
||||
(static_cast<uint64>(__total) * __rank / __N - __leftsize);
|
||||
|
||||
if (skew > 0)
|
||||
if (__skew > 0)
|
||||
{
|
||||
// Move to the left, find smallest.
|
||||
std::priority_queue<std::pair<value_type, int>,
|
||||
std::vector<std::pair<value_type, int> >,
|
||||
lexicographic_reverse<value_type, int, Comparator> >
|
||||
pq(lrcomp);
|
||||
std::priority_queue<std::pair<_ValueType, int>,
|
||||
std::vector<std::pair<_ValueType, int> >,
|
||||
_LexicographicReverse<_ValueType, int, _Compare> >
|
||||
__pq(__lrcomp);
|
||||
|
||||
for (int i = 0; i < m; i++)
|
||||
if (b[i] < ns[i])
|
||||
pq.push(std::make_pair(S(i)[b[i]], i));
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
if (__b[__i] < __ns[__i])
|
||||
__pq.push(std::make_pair(__S(__i)[__b[__i]], __i));
|
||||
|
||||
for (; skew != 0 && !pq.empty(); --skew)
|
||||
for (; __skew != 0 && !__pq.empty(); --__skew)
|
||||
{
|
||||
int source = pq.top().second;
|
||||
pq.pop();
|
||||
int source = __pq.top().second;
|
||||
__pq.pop();
|
||||
|
||||
a[source] = std::min(a[source] + n + 1, ns[source]);
|
||||
b[source] += n + 1;
|
||||
__a[source] = std::min(__a[source] + __n + 1, __ns[source]);
|
||||
__b[source] += __n + 1;
|
||||
|
||||
if (b[source] < ns[source])
|
||||
pq.push(std::make_pair(S(source)[b[source]], source));
|
||||
if (__b[source] < __ns[source])
|
||||
__pq.push(std::make_pair(__S(source)[__b[source]], source));
|
||||
}
|
||||
}
|
||||
else if (skew < 0)
|
||||
else if (__skew < 0)
|
||||
{
|
||||
// Move to the right, find greatest.
|
||||
std::priority_queue<std::pair<value_type, int>,
|
||||
std::vector<std::pair<value_type, int> >,
|
||||
lexicographic<value_type, int, Comparator> > pq(lcomp);
|
||||
std::priority_queue<std::pair<_ValueType, int>,
|
||||
std::vector<std::pair<_ValueType, int> >,
|
||||
_Lexicographic<_ValueType, int, _Compare> > __pq(__lcomp);
|
||||
|
||||
for (int i = 0; i < m; i++)
|
||||
if (a[i] > 0)
|
||||
pq.push(std::make_pair(S(i)[a[i] - 1], i));
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
if (__a[__i] > 0)
|
||||
__pq.push(std::make_pair(__S(__i)[__a[__i] - 1], __i));
|
||||
|
||||
for (; skew != 0; ++skew)
|
||||
for (; __skew != 0; ++__skew)
|
||||
{
|
||||
int source = pq.top().second;
|
||||
pq.pop();
|
||||
int source = __pq.top().second;
|
||||
__pq.pop();
|
||||
|
||||
a[source] -= n + 1;
|
||||
b[source] -= n + 1;
|
||||
__a[source] -= __n + 1;
|
||||
__b[source] -= __n + 1;
|
||||
|
||||
if (a[source] > 0)
|
||||
pq.push(std::make_pair(S(source)[a[source] - 1], source));
|
||||
if (__a[source] > 0)
|
||||
__pq.push(std::make_pair(__S(source)[__a[source] - 1], source));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Postconditions:
|
||||
// a[i] == b[i] in most cases, except when a[i] has been clamped
|
||||
// __a[__i] == __b[__i] in most cases, except when __a[__i] has been clamped
|
||||
// because of having reached the boundary
|
||||
|
||||
// Now return the result, calculate the offset.
|
||||
|
|
@ -326,236 +326,236 @@ namespace __gnu_parallel
|
|||
// Compare the keys on both edges of the border.
|
||||
|
||||
// Maximum of left edge, minimum of right edge.
|
||||
value_type* maxleft = NULL;
|
||||
value_type* minright = NULL;
|
||||
for (int i = 0; i < m; i++)
|
||||
_ValueType* __maxleft = NULL;
|
||||
_ValueType* __minright = NULL;
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
{
|
||||
if (a[i] > 0)
|
||||
if (__a[__i] > 0)
|
||||
{
|
||||
if (!maxleft)
|
||||
maxleft = &(S(i)[a[i] - 1]);
|
||||
if (!__maxleft)
|
||||
__maxleft = &(__S(__i)[__a[__i] - 1]);
|
||||
else
|
||||
{
|
||||
// Max, favor rear sequences.
|
||||
if (!comp(S(i)[a[i] - 1], *maxleft))
|
||||
maxleft = &(S(i)[a[i] - 1]);
|
||||
if (!__comp(__S(__i)[__a[__i] - 1], *__maxleft))
|
||||
__maxleft = &(__S(__i)[__a[__i] - 1]);
|
||||
}
|
||||
}
|
||||
if (b[i] < ns[i])
|
||||
if (__b[__i] < __ns[__i])
|
||||
{
|
||||
if (!minright)
|
||||
minright = &(S(i)[b[i]]);
|
||||
if (!__minright)
|
||||
__minright = &(__S(__i)[__b[__i]]);
|
||||
else
|
||||
{
|
||||
// Min, favor fore sequences.
|
||||
if (comp(S(i)[b[i]], *minright))
|
||||
minright = &(S(i)[b[i]]);
|
||||
if (__comp(__S(__i)[__b[__i]], *__minright))
|
||||
__minright = &(__S(__i)[__b[__i]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int seq = 0;
|
||||
for (int i = 0; i < m; i++)
|
||||
begin_offsets[i] = S(i) + a[i];
|
||||
int __seq = 0;
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
__begin_offsets[__i] = __S(__i) + __a[__i];
|
||||
|
||||
delete[] ns;
|
||||
delete[] a;
|
||||
delete[] b;
|
||||
delete[] __ns;
|
||||
delete[] __a;
|
||||
delete[] __b;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Selects the element at a certain global rank from several
|
||||
* @brief Selects the element at __a certain global __rank from several
|
||||
* sorted sequences.
|
||||
*
|
||||
* The sequences are passed via a sequence of random-access
|
||||
* The sequences are passed via __a __sequence of random-access
|
||||
* iterator pairs, none of the sequences may be empty.
|
||||
* @param begin_seqs Begin of the sequence of iterator pairs.
|
||||
* @param end_seqs End of the sequence of iterator pairs.
|
||||
* @param rank The global rank to partition at.
|
||||
* @param offset The rank of the selected element in the global
|
||||
* @param __begin_seqs Begin of the sequence of iterator pairs.
|
||||
* @param __end_seqs End of the sequence of iterator pairs.
|
||||
* @param __rank The global __rank to partition at.
|
||||
* @param __offset The rank of the selected element in the global
|
||||
* subsequence of elements equal to the selected element. If the
|
||||
* selected element is unique, this number is 0.
|
||||
* @param comp The ordering functor, defaults to std::less.
|
||||
* @param __comp The ordering functor, defaults to std::less.
|
||||
*/
|
||||
template<typename T, typename RanSeqs, typename RankType,
|
||||
typename Comparator>
|
||||
T
|
||||
multiseq_selection(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank,
|
||||
RankType& offset, Comparator comp = std::less<T>())
|
||||
template<typename _Tp, typename _RanSeqs, typename _RankType,
|
||||
typename _Compare>
|
||||
_Tp
|
||||
multiseq_selection(_RanSeqs __begin_seqs, _RanSeqs __end_seqs, _RankType __rank,
|
||||
_RankType& __offset, _Compare __comp = std::less<_Tp>())
|
||||
{
|
||||
_GLIBCXX_CALL(end_seqs - begin_seqs)
|
||||
_GLIBCXX_CALL(__end_seqs - __begin_seqs)
|
||||
|
||||
typedef typename std::iterator_traits<RanSeqs>::value_type::first_type
|
||||
It;
|
||||
typedef typename std::iterator_traits<It>::difference_type
|
||||
difference_type;
|
||||
typedef typename std::iterator_traits<_RanSeqs>::value_type::first_type
|
||||
_It;
|
||||
typedef typename std::iterator_traits<_It>::difference_type
|
||||
_DifferenceType;
|
||||
|
||||
lexicographic<T, int, Comparator> lcomp(comp);
|
||||
lexicographic_reverse<T, int, Comparator> lrcomp(comp);
|
||||
_Lexicographic<_Tp, int, _Compare> __lcomp(__comp);
|
||||
_LexicographicReverse<_Tp, int, _Compare> __lrcomp(__comp);
|
||||
|
||||
// Number of sequences, number of elements in total (possibly
|
||||
// including padding).
|
||||
difference_type m = std::distance(begin_seqs, end_seqs);
|
||||
difference_type N = 0;
|
||||
difference_type nmax, n, r;
|
||||
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs);
|
||||
_DifferenceType __N = 0;
|
||||
_DifferenceType __nmax, __n, __r;
|
||||
|
||||
for (int i = 0; i < m; i++)
|
||||
N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
__N += std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second);
|
||||
|
||||
if (m == 0 || N == 0 || rank < 0 || rank >= N)
|
||||
if (__m == 0 || __N == 0 || __rank < 0 || __rank >= __N)
|
||||
{
|
||||
// Result undefined when there is no data or rank is outside bounds.
|
||||
// _Result undefined when there is no data or __rank is outside bounds.
|
||||
throw std::exception();
|
||||
}
|
||||
|
||||
|
||||
difference_type* ns = new difference_type[m];
|
||||
difference_type* a = new difference_type[m];
|
||||
difference_type* b = new difference_type[m];
|
||||
difference_type l;
|
||||
_DifferenceType* __ns = new _DifferenceType[__m];
|
||||
_DifferenceType* __a = new _DifferenceType[__m];
|
||||
_DifferenceType* __b = new _DifferenceType[__m];
|
||||
_DifferenceType __l;
|
||||
|
||||
ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
|
||||
nmax = ns[0];
|
||||
for (int i = 0; i < m; ++i)
|
||||
__ns[0] = std::distance(__begin_seqs[0].first, __begin_seqs[0].second);
|
||||
__nmax = __ns[0];
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
{
|
||||
ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
|
||||
nmax = std::max(nmax, ns[i]);
|
||||
__ns[__i] = std::distance(__begin_seqs[__i].first, __begin_seqs[__i].second);
|
||||
__nmax = std::max(__nmax, __ns[__i]);
|
||||
}
|
||||
|
||||
r = __log2(nmax) + 1;
|
||||
__r = __log2(__nmax) + 1;
|
||||
|
||||
// Pad all lists to this length, at least as long as any ns[i],
|
||||
// equality iff nmax = 2^k - 1
|
||||
l = pow2(r) - 1;
|
||||
// Pad all lists to this length, at least as long as any ns[__i],
|
||||
// equality iff __nmax = 2^__k - 1
|
||||
__l = pow2(__r) - 1;
|
||||
|
||||
// From now on, including padding.
|
||||
N = l * m;
|
||||
__N = __l * __m;
|
||||
|
||||
for (int i = 0; i < m; ++i)
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
{
|
||||
a[i] = 0;
|
||||
b[i] = l;
|
||||
__a[__i] = 0;
|
||||
__b[__i] = __l;
|
||||
}
|
||||
n = l / 2;
|
||||
__n = __l / 2;
|
||||
|
||||
// Invariants:
|
||||
// 0 <= a[i] <= ns[i], 0 <= b[i] <= l
|
||||
// 0 <= __a[__i] <= __ns[__i], 0 <= __b[__i] <= __l
|
||||
|
||||
#define S(i) (begin_seqs[i].first)
|
||||
#define __S(__i) (__begin_seqs[__i].first)
|
||||
|
||||
// Initial partition.
|
||||
std::vector<std::pair<T, int> > sample;
|
||||
std::vector<std::pair<_Tp, int> > __sample;
|
||||
|
||||
for (int i = 0; i < m; i++)
|
||||
if (n < ns[i])
|
||||
sample.push_back(std::make_pair(S(i)[n], i));
|
||||
__gnu_sequential::sort(sample.begin(), sample.end(),
|
||||
lcomp, sequential_tag());
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
if (__n < __ns[__i])
|
||||
__sample.push_back(std::make_pair(__S(__i)[__n], __i));
|
||||
__gnu_sequential::sort(__sample.begin(), __sample.end(),
|
||||
__lcomp, sequential_tag());
|
||||
|
||||
// Conceptual infinity.
|
||||
for (int i = 0; i < m; i++)
|
||||
if (n >= ns[i])
|
||||
sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
if (__n >= __ns[__i])
|
||||
__sample.push_back(std::make_pair(__S(__i)[0] /*__dummy element*/, __i));
|
||||
|
||||
difference_type localrank = rank * m / N ;
|
||||
_DifferenceType localrank = __rank * __m / __N ;
|
||||
|
||||
int j;
|
||||
for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
|
||||
a[sample[j].second] += n + 1;
|
||||
for (; j < m; ++j)
|
||||
b[sample[j].second] -= n + 1;
|
||||
int __j;
|
||||
for (__j = 0; __j < localrank && ((__n + 1) <= __ns[__sample[__j].second]); ++__j)
|
||||
__a[__sample[__j].second] += __n + 1;
|
||||
for (; __j < __m; ++__j)
|
||||
__b[__sample[__j].second] -= __n + 1;
|
||||
|
||||
// Further refinement.
|
||||
while (n > 0)
|
||||
while (__n > 0)
|
||||
{
|
||||
n /= 2;
|
||||
__n /= 2;
|
||||
|
||||
const T* lmax = NULL;
|
||||
for (int i = 0; i < m; ++i)
|
||||
const _Tp* __lmax = NULL;
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
{
|
||||
if (a[i] > 0)
|
||||
if (__a[__i] > 0)
|
||||
{
|
||||
if (!lmax)
|
||||
lmax = &(S(i)[a[i] - 1]);
|
||||
if (!__lmax)
|
||||
__lmax = &(__S(__i)[__a[__i] - 1]);
|
||||
else
|
||||
{
|
||||
if (comp(*lmax, S(i)[a[i] - 1])) //max
|
||||
lmax = &(S(i)[a[i] - 1]);
|
||||
if (__comp(*__lmax, __S(__i)[__a[__i] - 1])) //max
|
||||
__lmax = &(__S(__i)[__a[__i] - 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i = 0; i < m; i++)
|
||||
int __i;
|
||||
for (__i = 0; __i < __m; __i++)
|
||||
{
|
||||
difference_type middle = (b[i] + a[i]) / 2;
|
||||
if (lmax && middle < ns[i] && comp(S(i)[middle], *lmax))
|
||||
a[i] = std::min(a[i] + n + 1, ns[i]);
|
||||
_DifferenceType __middle = (__b[__i] + __a[__i]) / 2;
|
||||
if (__lmax && __middle < __ns[__i] && __comp(__S(__i)[__middle], *__lmax))
|
||||
__a[__i] = std::min(__a[__i] + __n + 1, __ns[__i]);
|
||||
else
|
||||
b[i] -= n + 1;
|
||||
__b[__i] -= __n + 1;
|
||||
}
|
||||
|
||||
difference_type leftsize = 0, total = 0;
|
||||
for (int i = 0; i < m; ++i)
|
||||
_DifferenceType __leftsize = 0, __total = 0;
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
{
|
||||
leftsize += a[i] / (n + 1);
|
||||
total += l / (n + 1);
|
||||
__leftsize += __a[__i] / (__n + 1);
|
||||
__total += __l / (__n + 1);
|
||||
}
|
||||
|
||||
difference_type skew = ((unsigned long long)total * rank / N
|
||||
- leftsize);
|
||||
_DifferenceType __skew = ((unsigned long long)__total * __rank / __N
|
||||
- __leftsize);
|
||||
|
||||
if (skew > 0)
|
||||
if (__skew > 0)
|
||||
{
|
||||
// Move to the left, find smallest.
|
||||
std::priority_queue<std::pair<T, int>,
|
||||
std::vector<std::pair<T, int> >,
|
||||
lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
|
||||
std::priority_queue<std::pair<_Tp, int>,
|
||||
std::vector<std::pair<_Tp, int> >,
|
||||
_LexicographicReverse<_Tp, int, _Compare> > __pq(__lrcomp);
|
||||
|
||||
for (int i = 0; i < m; ++i)
|
||||
if (b[i] < ns[i])
|
||||
pq.push(std::make_pair(S(i)[b[i]], i));
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
if (__b[__i] < __ns[__i])
|
||||
__pq.push(std::make_pair(__S(__i)[__b[__i]], __i));
|
||||
|
||||
for (; skew != 0 && !pq.empty(); --skew)
|
||||
for (; __skew != 0 && !__pq.empty(); --__skew)
|
||||
{
|
||||
int source = pq.top().second;
|
||||
pq.pop();
|
||||
int source = __pq.top().second;
|
||||
__pq.pop();
|
||||
|
||||
a[source] = std::min(a[source] + n + 1, ns[source]);
|
||||
b[source] += n + 1;
|
||||
__a[source] = std::min(__a[source] + __n + 1, __ns[source]);
|
||||
__b[source] += __n + 1;
|
||||
|
||||
if (b[source] < ns[source])
|
||||
pq.push(std::make_pair(S(source)[b[source]], source));
|
||||
if (__b[source] < __ns[source])
|
||||
__pq.push(std::make_pair(__S(source)[__b[source]], source));
|
||||
}
|
||||
}
|
||||
else if (skew < 0)
|
||||
else if (__skew < 0)
|
||||
{
|
||||
// Move to the right, find greatest.
|
||||
std::priority_queue<std::pair<T, int>,
|
||||
std::vector<std::pair<T, int> >,
|
||||
lexicographic<T, int, Comparator> > pq(lcomp);
|
||||
std::priority_queue<std::pair<_Tp, int>,
|
||||
std::vector<std::pair<_Tp, int> >,
|
||||
_Lexicographic<_Tp, int, _Compare> > __pq(__lcomp);
|
||||
|
||||
for (int i = 0; i < m; ++i)
|
||||
if (a[i] > 0)
|
||||
pq.push(std::make_pair(S(i)[a[i] - 1], i));
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
if (__a[__i] > 0)
|
||||
__pq.push(std::make_pair(__S(__i)[__a[__i] - 1], __i));
|
||||
|
||||
for (; skew != 0; ++skew)
|
||||
for (; __skew != 0; ++__skew)
|
||||
{
|
||||
int source = pq.top().second;
|
||||
pq.pop();
|
||||
int source = __pq.top().second;
|
||||
__pq.pop();
|
||||
|
||||
a[source] -= n + 1;
|
||||
b[source] -= n + 1;
|
||||
__a[source] -= __n + 1;
|
||||
__b[source] -= __n + 1;
|
||||
|
||||
if (a[source] > 0)
|
||||
pq.push(std::make_pair(S(source)[a[source] - 1], source));
|
||||
if (__a[source] > 0)
|
||||
__pq.push(std::make_pair(__S(source)[__a[source] - 1], source));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Postconditions:
|
||||
// a[i] == b[i] in most cases, except when a[i] has been clamped
|
||||
// __a[__i] == __b[__i] in most cases, except when __a[__i] has been clamped
|
||||
// because of having reached the boundary
|
||||
|
||||
// Now return the result, calculate the offset.
|
||||
|
|
@ -563,71 +563,71 @@ namespace __gnu_parallel
|
|||
// Compare the keys on both edges of the border.
|
||||
|
||||
// Maximum of left edge, minimum of right edge.
|
||||
bool maxleftset = false, minrightset = false;
|
||||
bool __maxleftset = false, __minrightset = false;
|
||||
|
||||
// Impossible to avoid the warning?
|
||||
T maxleft, minright;
|
||||
for (int i = 0; i < m; ++i)
|
||||
_Tp __maxleft, __minright;
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
{
|
||||
if (a[i] > 0)
|
||||
if (__a[__i] > 0)
|
||||
{
|
||||
if (!maxleftset)
|
||||
if (!__maxleftset)
|
||||
{
|
||||
maxleft = S(i)[a[i] - 1];
|
||||
maxleftset = true;
|
||||
__maxleft = __S(__i)[__a[__i] - 1];
|
||||
__maxleftset = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Max.
|
||||
if (comp(maxleft, S(i)[a[i] - 1]))
|
||||
maxleft = S(i)[a[i] - 1];
|
||||
if (__comp(__maxleft, __S(__i)[__a[__i] - 1]))
|
||||
__maxleft = __S(__i)[__a[__i] - 1];
|
||||
}
|
||||
}
|
||||
if (b[i] < ns[i])
|
||||
if (__b[__i] < __ns[__i])
|
||||
{
|
||||
if (!minrightset)
|
||||
if (!__minrightset)
|
||||
{
|
||||
minright = S(i)[b[i]];
|
||||
minrightset = true;
|
||||
__minright = __S(__i)[__b[__i]];
|
||||
__minrightset = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Min.
|
||||
if (comp(S(i)[b[i]], minright))
|
||||
minright = S(i)[b[i]];
|
||||
if (__comp(__S(__i)[__b[__i]], __minright))
|
||||
__minright = __S(__i)[__b[__i]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Minright is the splitter, in any case.
|
||||
|
||||
if (!maxleftset || comp(minright, maxleft))
|
||||
if (!__maxleftset || __comp(__minright, __maxleft))
|
||||
{
|
||||
// Good luck, everything is split unambiguously.
|
||||
offset = 0;
|
||||
__offset = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// We have to calculate an offset.
|
||||
offset = 0;
|
||||
__offset = 0;
|
||||
|
||||
for (int i = 0; i < m; ++i)
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
{
|
||||
difference_type lb = std::lower_bound(S(i), S(i) + ns[i],
|
||||
minright,
|
||||
comp) - S(i);
|
||||
offset += a[i] - lb;
|
||||
_DifferenceType lb = std::lower_bound(__S(__i), __S(__i) + __ns[__i],
|
||||
__minright,
|
||||
__comp) - __S(__i);
|
||||
__offset += __a[__i] - lb;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] ns;
|
||||
delete[] a;
|
||||
delete[] b;
|
||||
delete[] __ns;
|
||||
delete[] __a;
|
||||
delete[] __b;
|
||||
|
||||
return minright;
|
||||
return __minright;
|
||||
}
|
||||
}
|
||||
|
||||
#undef S
|
||||
#undef __S
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H */
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -44,431 +44,431 @@ namespace __gnu_parallel
|
|||
|
||||
/** @brief Subsequence description. */
|
||||
template<typename _DifferenceTp>
|
||||
struct Piece
|
||||
struct _Piece
|
||||
{
|
||||
typedef _DifferenceTp difference_type;
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
|
||||
/** @brief Begin of subsequence. */
|
||||
difference_type begin;
|
||||
_DifferenceType __begin;
|
||||
|
||||
/** @brief End of subsequence. */
|
||||
difference_type end;
|
||||
_DifferenceType __end;
|
||||
};
|
||||
|
||||
/** @brief Data accessed by all threads.
|
||||
*
|
||||
* PMWMS = parallel multiway mergesort */
|
||||
template<typename RandomAccessIterator>
|
||||
struct PMWMSSortingData
|
||||
template<typename _RAIter>
|
||||
struct _PMWMSSortingData
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
/** @brief Number of threads involved. */
|
||||
thread_index_t num_threads;
|
||||
_ThreadIndex __num_threads;
|
||||
|
||||
/** @brief Input begin. */
|
||||
RandomAccessIterator source;
|
||||
/** @brief Input __begin. */
|
||||
_RAIter _M_source;
|
||||
|
||||
/** @brief Start indices, per thread. */
|
||||
difference_type* starts;
|
||||
_DifferenceType* _M_starts;
|
||||
|
||||
/** @brief Storage in which to sort. */
|
||||
value_type** temporary;
|
||||
_ValueType** _M_temporary;
|
||||
|
||||
/** @brief Samples. */
|
||||
value_type* samples;
|
||||
_ValueType* _M_samples;
|
||||
|
||||
/** @brief Offsets to add to the found positions. */
|
||||
difference_type* offsets;
|
||||
_DifferenceType* _M_offsets;
|
||||
|
||||
/** @brief Pieces of data to merge @c [thread][sequence] */
|
||||
std::vector<Piece<difference_type> >* pieces;
|
||||
/** @brief Pieces of data to merge @__c [thread][__sequence] */
|
||||
std::vector<_Piece<_DifferenceType> >* _M_pieces;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Select samples from a sequence.
|
||||
* @param sd Pointer to algorithm data. Result will be placed in
|
||||
* @c sd->samples.
|
||||
* @param num_samples Number of samples to select.
|
||||
* @brief Select _M_samples from a sequence.
|
||||
* @param __sd Pointer to algorithm data. _Result will be placed in
|
||||
* @__c __sd->_M_samples.
|
||||
* @param __num_samples Number of _M_samples to select.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename _DifferenceTp>
|
||||
template<typename _RAIter, typename _DifferenceTp>
|
||||
void
|
||||
determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
|
||||
_DifferenceTp num_samples)
|
||||
__determine_samples(_PMWMSSortingData<_RAIter>* __sd,
|
||||
_DifferenceTp __num_samples)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef _DifferenceTp difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
difference_type* es = new difference_type[num_samples + 2];
|
||||
_DifferenceType* __es = new _DifferenceType[__num_samples + 2];
|
||||
|
||||
equally_split(sd->starts[iam + 1] - sd->starts[iam],
|
||||
num_samples + 1, es);
|
||||
equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
|
||||
__num_samples + 1, __es);
|
||||
|
||||
for (difference_type i = 0; i < num_samples; ++i)
|
||||
::new(&(sd->samples[iam * num_samples + i]))
|
||||
value_type(sd->source[sd->starts[iam] + es[i + 1]]);
|
||||
for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
|
||||
::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
|
||||
_ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]);
|
||||
|
||||
delete[] es;
|
||||
delete[] __es;
|
||||
}
|
||||
|
||||
/** @brief Split consistently. */
|
||||
template<bool exact, typename RandomAccessIterator,
|
||||
typename Comparator, typename SortingPlacesIterator>
|
||||
struct split_consistently
|
||||
template<bool __exact, typename _RAIter,
|
||||
typename _Compare, typename _SortingPlacesIterator>
|
||||
struct _SplitConsistently
|
||||
{
|
||||
};
|
||||
|
||||
/** @brief Split by exact splitting. */
|
||||
template<typename RandomAccessIterator, typename Comparator,
|
||||
typename SortingPlacesIterator>
|
||||
struct split_consistently
|
||||
<true, RandomAccessIterator, Comparator, SortingPlacesIterator>
|
||||
template<typename _RAIter, typename _Compare,
|
||||
typename _SortingPlacesIterator>
|
||||
struct _SplitConsistently
|
||||
<true, _RAIter, _Compare, _SortingPlacesIterator>
|
||||
{
|
||||
void operator()(
|
||||
const thread_index_t iam,
|
||||
PMWMSSortingData<RandomAccessIterator>* sd,
|
||||
Comparator& comp,
|
||||
const _ThreadIndex __iam,
|
||||
_PMWMSSortingData<_RAIter>* __sd,
|
||||
_Compare& __comp,
|
||||
const typename
|
||||
std::iterator_traits<RandomAccessIterator>::difference_type
|
||||
num_samples)
|
||||
std::iterator_traits<_RAIter>::difference_type
|
||||
__num_samples)
|
||||
const
|
||||
{
|
||||
# pragma omp barrier
|
||||
|
||||
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
|
||||
seqs(sd->num_threads);
|
||||
for (thread_index_t s = 0; s < sd->num_threads; s++)
|
||||
seqs[s] = std::make_pair(sd->temporary[s],
|
||||
sd->temporary[s]
|
||||
+ (sd->starts[s + 1] - sd->starts[s]));
|
||||
std::vector<std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
|
||||
seqs(__sd->__num_threads);
|
||||
for (_ThreadIndex __s = 0; __s < __sd->__num_threads; __s++)
|
||||
seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
|
||||
__sd->_M_temporary[__s]
|
||||
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]));
|
||||
|
||||
std::vector<SortingPlacesIterator> offsets(sd->num_threads);
|
||||
std::vector<_SortingPlacesIterator> _M_offsets(__sd->__num_threads);
|
||||
|
||||
// if not last thread
|
||||
if (iam < sd->num_threads - 1)
|
||||
if (__iam < __sd->__num_threads - 1)
|
||||
multiseq_partition(seqs.begin(), seqs.end(),
|
||||
sd->starts[iam + 1], offsets.begin(), comp);
|
||||
__sd->_M_starts[__iam + 1], _M_offsets.begin(), __comp);
|
||||
|
||||
for (int seq = 0; seq < sd->num_threads; seq++)
|
||||
for (int __seq = 0; __seq < __sd->__num_threads; __seq++)
|
||||
{
|
||||
// for each sequence
|
||||
if (iam < (sd->num_threads - 1))
|
||||
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
|
||||
if (__iam < (__sd->__num_threads - 1))
|
||||
__sd->_M_pieces[__iam][__seq].__end = _M_offsets[__seq] - seqs[__seq].first;
|
||||
else
|
||||
// very end of this sequence
|
||||
sd->pieces[iam][seq].end =
|
||||
sd->starts[seq + 1] - sd->starts[seq];
|
||||
__sd->_M_pieces[__iam][__seq].__end =
|
||||
__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
|
||||
for (_ThreadIndex __seq = 0; __seq < __sd->__num_threads; __seq++)
|
||||
{
|
||||
// For each sequence.
|
||||
if (iam > 0)
|
||||
sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
|
||||
if (__iam > 0)
|
||||
__sd->_M_pieces[__iam][__seq].__begin = __sd->_M_pieces[__iam - 1][__seq].__end;
|
||||
else
|
||||
// Absolute beginning.
|
||||
sd->pieces[iam][seq].begin = 0;
|
||||
__sd->_M_pieces[__iam][__seq].__begin = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief Split by sampling. */
|
||||
template<typename RandomAccessIterator, typename Comparator,
|
||||
typename SortingPlacesIterator>
|
||||
struct split_consistently<false, RandomAccessIterator, Comparator,
|
||||
SortingPlacesIterator>
|
||||
template<typename _RAIter, typename _Compare,
|
||||
typename _SortingPlacesIterator>
|
||||
struct _SplitConsistently<false, _RAIter, _Compare,
|
||||
_SortingPlacesIterator>
|
||||
{
|
||||
void operator()(
|
||||
const thread_index_t iam,
|
||||
PMWMSSortingData<RandomAccessIterator>* sd,
|
||||
Comparator& comp,
|
||||
const _ThreadIndex __iam,
|
||||
_PMWMSSortingData<_RAIter>* __sd,
|
||||
_Compare& __comp,
|
||||
const typename
|
||||
std::iterator_traits<RandomAccessIterator>::difference_type
|
||||
num_samples)
|
||||
std::iterator_traits<_RAIter>::difference_type
|
||||
__num_samples)
|
||||
const
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
determine_samples(sd, num_samples);
|
||||
__determine_samples(__sd, __num_samples);
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp single
|
||||
__gnu_sequential::sort(sd->samples,
|
||||
sd->samples + (num_samples * sd->num_threads),
|
||||
comp);
|
||||
__gnu_sequential::sort(__sd->_M_samples,
|
||||
__sd->_M_samples + (__num_samples * __sd->__num_threads),
|
||||
__comp);
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
for (thread_index_t s = 0; s < sd->num_threads; ++s)
|
||||
for (_ThreadIndex __s = 0; __s < __sd->__num_threads; ++__s)
|
||||
{
|
||||
// For each sequence.
|
||||
if (num_samples * iam > 0)
|
||||
sd->pieces[iam][s].begin =
|
||||
std::lower_bound(sd->temporary[s],
|
||||
sd->temporary[s]
|
||||
+ (sd->starts[s + 1] - sd->starts[s]),
|
||||
sd->samples[num_samples * iam],
|
||||
comp)
|
||||
- sd->temporary[s];
|
||||
if (__num_samples * __iam > 0)
|
||||
__sd->_M_pieces[__iam][__s].__begin =
|
||||
std::lower_bound(__sd->_M_temporary[__s],
|
||||
__sd->_M_temporary[__s]
|
||||
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
|
||||
__sd->_M_samples[__num_samples * __iam],
|
||||
__comp)
|
||||
- __sd->_M_temporary[__s];
|
||||
else
|
||||
// Absolute beginning.
|
||||
sd->pieces[iam][s].begin = 0;
|
||||
__sd->_M_pieces[__iam][__s].__begin = 0;
|
||||
|
||||
if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
|
||||
sd->pieces[iam][s].end =
|
||||
std::lower_bound(sd->temporary[s],
|
||||
sd->temporary[s]
|
||||
+ (sd->starts[s + 1] - sd->starts[s]),
|
||||
sd->samples[num_samples * (iam + 1)],
|
||||
comp)
|
||||
- sd->temporary[s];
|
||||
if ((__num_samples * (__iam + 1)) < (__num_samples * __sd->__num_threads))
|
||||
__sd->_M_pieces[__iam][__s].__end =
|
||||
std::lower_bound(__sd->_M_temporary[__s],
|
||||
__sd->_M_temporary[__s]
|
||||
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
|
||||
__sd->_M_samples[__num_samples * (__iam + 1)],
|
||||
__comp)
|
||||
- __sd->_M_temporary[__s];
|
||||
else
|
||||
// Absolute end.
|
||||
sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
|
||||
// Absolute __end.
|
||||
__sd->_M_pieces[__iam][__s].__end = __sd->_M_starts[__s + 1] - __sd->_M_starts[__s];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<bool stable, typename RandomAccessIterator, typename Comparator>
|
||||
struct possibly_stable_sort
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
struct __possibly_stable_sort
|
||||
{
|
||||
};
|
||||
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
|
||||
template<typename _RAIter, typename _Compare>
|
||||
struct __possibly_stable_sort<true, _RAIter, _Compare>
|
||||
{
|
||||
void operator()(const RandomAccessIterator& begin,
|
||||
const RandomAccessIterator& end, Comparator& comp) const
|
||||
void operator()(const _RAIter& __begin,
|
||||
const _RAIter& __end, _Compare& __comp) const
|
||||
{
|
||||
__gnu_sequential::stable_sort(begin, end, comp);
|
||||
__gnu_sequential::stable_sort(__begin, __end, __comp);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
|
||||
template<typename _RAIter, typename _Compare>
|
||||
struct __possibly_stable_sort<false, _RAIter, _Compare>
|
||||
{
|
||||
void operator()(const RandomAccessIterator begin,
|
||||
const RandomAccessIterator end, Comparator& comp) const
|
||||
void operator()(const _RAIter __begin,
|
||||
const _RAIter __end, _Compare& __comp) const
|
||||
{
|
||||
__gnu_sequential::sort(begin, end, comp);
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool stable, typename SeqRandomAccessIterator,
|
||||
typename RandomAccessIterator, typename Comparator,
|
||||
template<bool __stable, typename Seq_RAIter,
|
||||
typename _RAIter, typename _Compare,
|
||||
typename DiffType>
|
||||
struct possibly_stable_multiway_merge
|
||||
struct __possibly_stable_multiway_merge
|
||||
{
|
||||
};
|
||||
|
||||
template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
|
||||
typename Comparator, typename DiffType>
|
||||
struct possibly_stable_multiway_merge
|
||||
<true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
|
||||
template<typename Seq_RAIter, typename _RAIter,
|
||||
typename _Compare, typename DiffType>
|
||||
struct __possibly_stable_multiway_merge
|
||||
<true, Seq_RAIter, _RAIter, _Compare,
|
||||
DiffType>
|
||||
{
|
||||
void operator()(const SeqRandomAccessIterator& seqs_begin,
|
||||
const SeqRandomAccessIterator& seqs_end,
|
||||
const RandomAccessIterator& target,
|
||||
Comparator& comp,
|
||||
DiffType length_am) const
|
||||
void operator()(const Seq_RAIter& __seqs_begin,
|
||||
const Seq_RAIter& __seqs_end,
|
||||
const _RAIter& __target,
|
||||
_Compare& __comp,
|
||||
DiffType __length_am) const
|
||||
{
|
||||
stable_multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
|
||||
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
|
||||
sequential_tag());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
|
||||
typename Comparator, typename DiffType>
|
||||
struct possibly_stable_multiway_merge
|
||||
<false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
|
||||
template<typename Seq_RAIter, typename _RAIter,
|
||||
typename _Compare, typename DiffType>
|
||||
struct __possibly_stable_multiway_merge
|
||||
<false, Seq_RAIter, _RAIter, _Compare,
|
||||
DiffType>
|
||||
{
|
||||
void operator()(const SeqRandomAccessIterator& seqs_begin,
|
||||
const SeqRandomAccessIterator& seqs_end,
|
||||
const RandomAccessIterator& target,
|
||||
Comparator& comp,
|
||||
DiffType length_am) const
|
||||
void operator()(const Seq_RAIter& __seqs_begin,
|
||||
const Seq_RAIter& __seqs_end,
|
||||
const _RAIter& __target,
|
||||
_Compare& __comp,
|
||||
DiffType __length_am) const
|
||||
{
|
||||
multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
|
||||
multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
|
||||
sequential_tag());
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief PMWMS code executed by each thread.
|
||||
* @param sd Pointer to algorithm data.
|
||||
* @param comp Comparator.
|
||||
* @param __sd Pointer to algorithm data.
|
||||
* @param __comp Comparator.
|
||||
*/
|
||||
template<bool stable, bool exact, typename RandomAccessIterator,
|
||||
typename Comparator>
|
||||
template<bool __stable, bool __exact, typename _RAIter,
|
||||
typename _Compare>
|
||||
void
|
||||
parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
|
||||
Comparator& comp)
|
||||
parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
|
||||
_Compare& __comp)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
// Length of this thread's chunk, before merging.
|
||||
difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
|
||||
_DifferenceType __length_local = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
|
||||
|
||||
// Sort in temporary storage, leave space for sentinel.
|
||||
|
||||
typedef value_type* SortingPlacesIterator;
|
||||
typedef _ValueType* _SortingPlacesIterator;
|
||||
|
||||
sd->temporary[iam] =
|
||||
static_cast<value_type*>(
|
||||
::operator new(sizeof(value_type) * (length_local + 1)));
|
||||
__sd->_M_temporary[__iam] =
|
||||
static_cast<_ValueType*>(
|
||||
::operator new(sizeof(_ValueType) * (__length_local + 1)));
|
||||
|
||||
// Copy there.
|
||||
std::uninitialized_copy(sd->source + sd->starts[iam],
|
||||
sd->source + sd->starts[iam] + length_local,
|
||||
sd->temporary[iam]);
|
||||
std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
|
||||
__sd->_M_source + __sd->_M_starts[__iam] + __length_local,
|
||||
__sd->_M_temporary[__iam]);
|
||||
|
||||
possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
|
||||
(sd->temporary[iam], sd->temporary[iam] + length_local, comp);
|
||||
__possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
|
||||
(__sd->_M_temporary[__iam], __sd->_M_temporary[__iam] + __length_local, __comp);
|
||||
|
||||
// Invariant: locally sorted subsequence in sd->temporary[iam],
|
||||
// sd->temporary[iam] + length_local.
|
||||
// Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
|
||||
// __sd->_M_temporary[__iam] + __length_local.
|
||||
|
||||
// No barrier here: Synchronization is done by the splitting routine.
|
||||
|
||||
difference_type num_samples =
|
||||
_Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
|
||||
split_consistently
|
||||
<exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
|
||||
(iam, sd, comp, num_samples);
|
||||
_DifferenceType __num_samples =
|
||||
_Settings::get().sort_mwms_oversampling * __sd->__num_threads - 1;
|
||||
_SplitConsistently
|
||||
<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
|
||||
(__iam, __sd, __comp, __num_samples);
|
||||
|
||||
// Offset from target begin, length after merging.
|
||||
difference_type offset = 0, length_am = 0;
|
||||
for (thread_index_t s = 0; s < sd->num_threads; s++)
|
||||
// Offset from __target __begin, __length after merging.
|
||||
_DifferenceType __offset = 0, __length_am = 0;
|
||||
for (_ThreadIndex __s = 0; __s < __sd->__num_threads; __s++)
|
||||
{
|
||||
length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
|
||||
offset += sd->pieces[iam][s].begin;
|
||||
__length_am += __sd->_M_pieces[__iam][__s].__end - __sd->_M_pieces[__iam][__s].__begin;
|
||||
__offset += __sd->_M_pieces[__iam][__s].__begin;
|
||||
}
|
||||
|
||||
typedef std::vector<
|
||||
std::pair<SortingPlacesIterator, SortingPlacesIterator> >
|
||||
std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
|
||||
seq_vector_type;
|
||||
seq_vector_type seqs(sd->num_threads);
|
||||
seq_vector_type seqs(__sd->__num_threads);
|
||||
|
||||
for (int s = 0; s < sd->num_threads; ++s)
|
||||
for (int __s = 0; __s < __sd->__num_threads; ++__s)
|
||||
{
|
||||
seqs[s] =
|
||||
std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
|
||||
sd->temporary[s] + sd->pieces[iam][s].end);
|
||||
seqs[__s] =
|
||||
std::make_pair(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s].__begin,
|
||||
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s].__end);
|
||||
}
|
||||
|
||||
possibly_stable_multiway_merge<
|
||||
stable,
|
||||
__possibly_stable_multiway_merge<
|
||||
__stable,
|
||||
typename seq_vector_type::iterator,
|
||||
RandomAccessIterator,
|
||||
Comparator, difference_type>()
|
||||
_RAIter,
|
||||
_Compare, _DifferenceType>()
|
||||
(seqs.begin(), seqs.end(),
|
||||
sd->source + offset, comp,
|
||||
length_am);
|
||||
__sd->_M_source + __offset, __comp,
|
||||
__length_am);
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
::operator delete(sd->temporary[iam]);
|
||||
::operator delete(__sd->_M_temporary[__iam]);
|
||||
}
|
||||
|
||||
/** @brief PMWMS main call.
|
||||
* @param begin Begin iterator of sequence.
|
||||
* @param end End iterator of sequence.
|
||||
* @param comp Comparator.
|
||||
* @param n Length of sequence.
|
||||
* @param num_threads Number of threads to use.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __n Length of sequence.
|
||||
* @param __num_threads Number of threads to use.
|
||||
*/
|
||||
template<bool stable, bool exact, typename RandomAccessIterator,
|
||||
typename Comparator>
|
||||
template<bool __stable, bool __exact, typename _RAIter,
|
||||
typename _Compare>
|
||||
void
|
||||
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp,
|
||||
thread_index_t num_threads)
|
||||
parallel_sort_mwms(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
if (n <= 1)
|
||||
if (__n <= 1)
|
||||
return;
|
||||
|
||||
// at least one element per thread
|
||||
if (num_threads > n)
|
||||
num_threads = static_cast<thread_index_t>(n);
|
||||
if (__num_threads > __n)
|
||||
__num_threads = static_cast<_ThreadIndex>(__n);
|
||||
|
||||
// shared variables
|
||||
PMWMSSortingData<RandomAccessIterator> sd;
|
||||
difference_type* starts;
|
||||
_PMWMSSortingData<_RAIter> __sd;
|
||||
_DifferenceType* _M_starts;
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
num_threads = omp_get_num_threads(); //no more threads than requested
|
||||
__num_threads = omp_get_num_threads(); //no more threads than requested
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
sd.num_threads = num_threads;
|
||||
sd.source = begin;
|
||||
__sd.__num_threads = __num_threads;
|
||||
__sd._M_source = __begin;
|
||||
|
||||
sd.temporary = new value_type*[num_threads];
|
||||
__sd._M_temporary = new _ValueType*[__num_threads];
|
||||
|
||||
if (!exact)
|
||||
if (!__exact)
|
||||
{
|
||||
difference_type size =
|
||||
(_Settings::get().sort_mwms_oversampling * num_threads - 1)
|
||||
* num_threads;
|
||||
sd.samples = static_cast<value_type*>(
|
||||
::operator new(size * sizeof(value_type)));
|
||||
_DifferenceType size =
|
||||
(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
|
||||
* __num_threads;
|
||||
__sd._M_samples = static_cast<_ValueType*>(
|
||||
::operator new(size * sizeof(_ValueType)));
|
||||
}
|
||||
else
|
||||
sd.samples = NULL;
|
||||
__sd._M_samples = NULL;
|
||||
|
||||
sd.offsets = new difference_type[num_threads - 1];
|
||||
sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
|
||||
for (int s = 0; s < num_threads; ++s)
|
||||
sd.pieces[s].resize(num_threads);
|
||||
starts = sd.starts = new difference_type[num_threads + 1];
|
||||
__sd._M_offsets = new _DifferenceType[__num_threads - 1];
|
||||
__sd._M_pieces = new std::vector<_Piece<_DifferenceType> >[__num_threads];
|
||||
for (int __s = 0; __s < __num_threads; ++__s)
|
||||
__sd._M_pieces[__s].resize(__num_threads);
|
||||
_M_starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
|
||||
|
||||
difference_type chunk_length = n / num_threads;
|
||||
difference_type split = n % num_threads;
|
||||
difference_type pos = 0;
|
||||
for (int i = 0; i < num_threads; ++i)
|
||||
_DifferenceType __chunk_length = __n / __num_threads;
|
||||
_DifferenceType __split = __n % __num_threads;
|
||||
_DifferenceType __pos = 0;
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
starts[i] = pos;
|
||||
pos += (i < split) ? (chunk_length + 1) : chunk_length;
|
||||
_M_starts[__i] = __pos;
|
||||
__pos += (__i < __split) ? (__chunk_length + 1) : __chunk_length;
|
||||
}
|
||||
starts[num_threads] = pos;
|
||||
_M_starts[__num_threads] = __pos;
|
||||
} //single
|
||||
|
||||
// Now sort in parallel.
|
||||
parallel_sort_mwms_pu<stable, exact>(&sd, comp);
|
||||
parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
|
||||
} //parallel
|
||||
|
||||
delete[] starts;
|
||||
delete[] sd.temporary;
|
||||
delete[] _M_starts;
|
||||
delete[] __sd._M_temporary;
|
||||
|
||||
if (!exact)
|
||||
::operator delete(sd.samples);
|
||||
if (!__exact)
|
||||
::operator delete(__sd._M_samples);
|
||||
|
||||
delete[] sd.offsets;
|
||||
delete[] sd.pieces;
|
||||
delete[] __sd._M_offsets;
|
||||
delete[] __sd._M_pieces;
|
||||
}
|
||||
} //namespace __gnu_parallel
|
||||
|
||||
|
|
|
|||
|
|
@ -51,448 +51,448 @@ namespace std
|
|||
namespace __parallel
|
||||
{
|
||||
// Sequential fallback.
|
||||
template<typename InputIterator, typename T>
|
||||
inline T
|
||||
accumulate(InputIterator begin, InputIterator end, T init,
|
||||
template<typename _IIter, typename _Tp>
|
||||
inline _Tp
|
||||
accumulate(_IIter __begin, _IIter __end, _Tp __init,
|
||||
__gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::accumulate(begin, end, init); }
|
||||
{ return _GLIBCXX_STD_P::accumulate(__begin, __end, __init); }
|
||||
|
||||
template<typename InputIterator, typename T, typename BinaryOperation>
|
||||
inline T
|
||||
accumulate(InputIterator begin, InputIterator end, T init,
|
||||
BinaryOperation binary_op, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::accumulate(begin, end, init, binary_op); }
|
||||
template<typename _IIter, typename _Tp, typename _BinaryOperation>
|
||||
inline _Tp
|
||||
accumulate(_IIter __begin, _IIter __end, _Tp __init,
|
||||
_BinaryOperation __binary_op, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::accumulate(__begin, __end, __init, __binary_op); }
|
||||
|
||||
// Sequential fallback for input iterator case.
|
||||
template<typename InputIterator, typename T, typename IteratorTag>
|
||||
inline T
|
||||
accumulate_switch(InputIterator begin, InputIterator end,
|
||||
T init, IteratorTag)
|
||||
{ return accumulate(begin, end, init, __gnu_parallel::sequential_tag()); }
|
||||
template<typename _IIter, typename _Tp, typename _IteratorTag>
|
||||
inline _Tp
|
||||
__accumulate_switch(_IIter __begin, _IIter __end,
|
||||
_Tp __init, _IteratorTag)
|
||||
{ return accumulate(__begin, __end, __init, __gnu_parallel::sequential_tag()); }
|
||||
|
||||
template<typename InputIterator, typename T, typename BinaryOperation,
|
||||
typename IteratorTag>
|
||||
inline T
|
||||
accumulate_switch(InputIterator begin, InputIterator end, T init,
|
||||
BinaryOperation binary_op, IteratorTag)
|
||||
{ return accumulate(begin, end, init, binary_op,
|
||||
template<typename _IIter, typename _Tp, typename _BinaryOperation,
|
||||
typename _IteratorTag>
|
||||
inline _Tp
|
||||
__accumulate_switch(_IIter __begin, _IIter __end, _Tp __init,
|
||||
_BinaryOperation __binary_op, _IteratorTag)
|
||||
{ return accumulate(__begin, __end, __init, __binary_op,
|
||||
__gnu_parallel::sequential_tag()); }
|
||||
|
||||
// Parallel algorithm for random access iterators.
|
||||
template<typename _RandomAccessIterator, typename T,
|
||||
typename BinaryOperation>
|
||||
T
|
||||
accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end,
|
||||
T init, BinaryOperation binary_op,
|
||||
template<typename __RAIter, typename _Tp,
|
||||
typename _BinaryOperation>
|
||||
_Tp
|
||||
__accumulate_switch(__RAIter __begin, __RAIter __end,
|
||||
_Tp __init, _BinaryOperation __binary_op,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism __parallelism_tag
|
||||
= __gnu_parallel::parallel_unbalanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin)
|
||||
>= __gnu_parallel::_Settings::get().accumulate_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
&& __gnu_parallel::__is_parallel(__parallelism_tag)))
|
||||
{
|
||||
T res = init;
|
||||
__gnu_parallel::accumulate_selector<_RandomAccessIterator>
|
||||
my_selector;
|
||||
_Tp __res = __init;
|
||||
__gnu_parallel::__accumulate_selector<__RAIter>
|
||||
__my_selector;
|
||||
__gnu_parallel::
|
||||
for_each_template_random_access_ed(begin, end,
|
||||
__gnu_parallel::nothing(),
|
||||
my_selector,
|
||||
for_each_template_random_access_ed(__begin, __end,
|
||||
__gnu_parallel::_Nothing(),
|
||||
__my_selector,
|
||||
__gnu_parallel::
|
||||
accumulate_binop_reduct
|
||||
<BinaryOperation>(binary_op),
|
||||
res, res, -1);
|
||||
return res;
|
||||
__accumulate_binop_reduct
|
||||
<_BinaryOperation>(__binary_op),
|
||||
__res, __res, -1);
|
||||
return __res;
|
||||
}
|
||||
else
|
||||
return accumulate(begin, end, init, binary_op,
|
||||
return accumulate(__begin, __end, __init, __binary_op,
|
||||
__gnu_parallel::sequential_tag());
|
||||
}
|
||||
|
||||
// Public interface.
|
||||
template<typename InputIterator, typename T>
|
||||
inline T
|
||||
accumulate(InputIterator begin, InputIterator end, T init,
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
template<typename _IIter, typename _Tp>
|
||||
inline _Tp
|
||||
accumulate(_IIter __begin, _IIter __end, _Tp __init,
|
||||
__gnu_parallel::_Parallelism __parallelism_tag)
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator> iterator_traits;
|
||||
typedef typename iterator_traits::value_type value_type;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
typedef std::iterator_traits<_IIter> _IteratorTraits;
|
||||
typedef typename _IteratorTraits::value_type _ValueType;
|
||||
typedef typename _IteratorTraits::iterator_category _IteratorCategory;
|
||||
|
||||
return accumulate_switch(begin, end, init,
|
||||
__gnu_parallel::plus<T, value_type>(),
|
||||
iterator_category(), parallelism_tag);
|
||||
return __accumulate_switch(__begin, __end, __init,
|
||||
__gnu_parallel::_Plus<_Tp, _ValueType>(),
|
||||
_IteratorCategory(), __parallelism_tag);
|
||||
}
|
||||
|
||||
template<typename InputIterator, typename T>
|
||||
inline T
|
||||
accumulate(InputIterator begin, InputIterator end, T init)
|
||||
template<typename _IIter, typename _Tp>
|
||||
inline _Tp
|
||||
accumulate(_IIter __begin, _IIter __end, _Tp __init)
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator> iterator_traits;
|
||||
typedef typename iterator_traits::value_type value_type;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
typedef std::iterator_traits<_IIter> _IteratorTraits;
|
||||
typedef typename _IteratorTraits::value_type _ValueType;
|
||||
typedef typename _IteratorTraits::iterator_category _IteratorCategory;
|
||||
|
||||
return accumulate_switch(begin, end, init,
|
||||
__gnu_parallel::plus<T, value_type>(),
|
||||
iterator_category());
|
||||
return __accumulate_switch(__begin, __end, __init,
|
||||
__gnu_parallel::_Plus<_Tp, _ValueType>(),
|
||||
_IteratorCategory());
|
||||
}
|
||||
|
||||
template<typename InputIterator, typename T, typename BinaryOperation>
|
||||
inline T
|
||||
accumulate(InputIterator begin, InputIterator end, T init,
|
||||
BinaryOperation binary_op,
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
template<typename _IIter, typename _Tp, typename _BinaryOperation>
|
||||
inline _Tp
|
||||
accumulate(_IIter __begin, _IIter __end, _Tp __init,
|
||||
_BinaryOperation __binary_op,
|
||||
__gnu_parallel::_Parallelism __parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> iterator_traits;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
return accumulate_switch(begin, end, init, binary_op,
|
||||
iterator_category(), parallelism_tag);
|
||||
typedef iterator_traits<_IIter> _IteratorTraits;
|
||||
typedef typename _IteratorTraits::iterator_category _IteratorCategory;
|
||||
return __accumulate_switch(__begin, __end, __init, __binary_op,
|
||||
_IteratorCategory(), __parallelism_tag);
|
||||
}
|
||||
|
||||
template<typename InputIterator, typename T, typename BinaryOperation>
|
||||
inline T
|
||||
accumulate(InputIterator begin, InputIterator end, T init,
|
||||
BinaryOperation binary_op)
|
||||
template<typename _IIter, typename _Tp, typename _BinaryOperation>
|
||||
inline _Tp
|
||||
accumulate(_IIter __begin, _IIter __end, _Tp __init,
|
||||
_BinaryOperation __binary_op)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> iterator_traits;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
return accumulate_switch(begin, end, init, binary_op,
|
||||
iterator_category());
|
||||
typedef iterator_traits<_IIter> _IteratorTraits;
|
||||
typedef typename _IteratorTraits::iterator_category _IteratorCategory;
|
||||
return __accumulate_switch(__begin, __end, __init, __binary_op,
|
||||
_IteratorCategory());
|
||||
}
|
||||
|
||||
|
||||
// Sequential fallback.
|
||||
template<typename InputIterator1, typename InputIterator2, typename T>
|
||||
inline T
|
||||
inner_product(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init,
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp>
|
||||
inline _Tp
|
||||
inner_product(_IIter1 __first1, _IIter1 __last1,
|
||||
_IIter2 __first2, _Tp __init,
|
||||
__gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init); }
|
||||
{ return _GLIBCXX_STD_P::inner_product(__first1, __last1, __first2, __init); }
|
||||
|
||||
template<typename InputIterator1, typename InputIterator2, typename T,
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp,
|
||||
typename BinaryFunction1, typename BinaryFunction2>
|
||||
inline T
|
||||
inner_product(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init, BinaryFunction1 binary_op1,
|
||||
BinaryFunction2 binary_op2, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init,
|
||||
binary_op1, binary_op2); }
|
||||
inline _Tp
|
||||
inner_product(_IIter1 __first1, _IIter1 __last1,
|
||||
_IIter2 __first2, _Tp __init, BinaryFunction1 __binary_op1,
|
||||
BinaryFunction2 __binary_op2, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::inner_product(__first1, __last1, __first2, __init,
|
||||
__binary_op1, __binary_op2); }
|
||||
|
||||
// Parallel algorithm for random access iterators.
|
||||
template<typename RandomAccessIterator1, typename RandomAccessIterator2,
|
||||
typename T, typename BinaryFunction1, typename BinaryFunction2>
|
||||
T
|
||||
inner_product_switch(RandomAccessIterator1 first1,
|
||||
RandomAccessIterator1 last1,
|
||||
RandomAccessIterator2 first2, T init,
|
||||
BinaryFunction1 binary_op1,
|
||||
BinaryFunction2 binary_op2,
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Tp, typename BinaryFunction1, typename BinaryFunction2>
|
||||
_Tp
|
||||
__inner_product_switch(_RAIter1 __first1,
|
||||
_RAIter1 __last1,
|
||||
_RAIter2 __first2, _Tp __init,
|
||||
BinaryFunction1 __binary_op1,
|
||||
BinaryFunction2 __binary_op2,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism __parallelism_tag
|
||||
= __gnu_parallel::parallel_unbalanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1)
|
||||
if (_GLIBCXX_PARALLEL_CONDITION((__last1 - __first1)
|
||||
>= __gnu_parallel::_Settings::get().
|
||||
accumulate_minimal_n
|
||||
&& __gnu_parallel::
|
||||
is_parallel(parallelism_tag)))
|
||||
__is_parallel(__parallelism_tag)))
|
||||
{
|
||||
T res = init;
|
||||
_Tp __res = __init;
|
||||
__gnu_parallel::
|
||||
inner_product_selector<RandomAccessIterator1,
|
||||
RandomAccessIterator2, T> my_selector(first1, first2);
|
||||
__inner_product_selector<_RAIter1,
|
||||
_RAIter2, _Tp> __my_selector(__first1, __first2);
|
||||
__gnu_parallel::
|
||||
for_each_template_random_access_ed(first1, last1, binary_op2,
|
||||
my_selector, binary_op1,
|
||||
res, res, -1);
|
||||
return res;
|
||||
for_each_template_random_access_ed(__first1, __last1, __binary_op2,
|
||||
__my_selector, __binary_op1,
|
||||
__res, __res, -1);
|
||||
return __res;
|
||||
}
|
||||
else
|
||||
return inner_product(first1, last1, first2, init,
|
||||
return inner_product(__first1, __last1, __first2, __init,
|
||||
__gnu_parallel::sequential_tag());
|
||||
}
|
||||
|
||||
// No parallelism for input iterators.
|
||||
template<typename InputIterator1, typename InputIterator2, typename T,
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp,
|
||||
typename BinaryFunction1, typename BinaryFunction2,
|
||||
typename IteratorTag1, typename IteratorTag2>
|
||||
inline T
|
||||
inner_product_switch(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init,
|
||||
BinaryFunction1 binary_op1,
|
||||
BinaryFunction2 binary_op2,
|
||||
IteratorTag1, IteratorTag2)
|
||||
{ return inner_product(first1, last1, first2, init,
|
||||
binary_op1, binary_op2,
|
||||
typename _IteratorTag1, typename _IteratorTag2>
|
||||
inline _Tp
|
||||
__inner_product_switch(_IIter1 __first1, _IIter1 __last1,
|
||||
_IIter2 __first2, _Tp __init,
|
||||
BinaryFunction1 __binary_op1,
|
||||
BinaryFunction2 __binary_op2,
|
||||
_IteratorTag1, _IteratorTag2)
|
||||
{ return inner_product(__first1, __last1, __first2, __init,
|
||||
__binary_op1, __binary_op2,
|
||||
__gnu_parallel::sequential_tag()); }
|
||||
|
||||
template<typename InputIterator1, typename InputIterator2, typename T,
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp,
|
||||
typename BinaryFunction1, typename BinaryFunction2>
|
||||
inline T
|
||||
inner_product(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init, BinaryFunction1 binary_op1,
|
||||
BinaryFunction2 binary_op2,
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
inline _Tp
|
||||
inner_product(_IIter1 __first1, _IIter1 __last1,
|
||||
_IIter2 __first2, _Tp __init, BinaryFunction1 __binary_op1,
|
||||
BinaryFunction2 __binary_op2,
|
||||
__gnu_parallel::_Parallelism __parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator1> traits1_type;
|
||||
typedef typename traits1_type::iterator_category iterator1_category;
|
||||
typedef iterator_traits<_IIter1> _TraitsType1;
|
||||
typedef typename _TraitsType1::iterator_category _IteratorCategory1;
|
||||
|
||||
typedef iterator_traits<InputIterator2> traits2_type;
|
||||
typedef typename traits2_type::iterator_category iterator2_category;
|
||||
typedef iterator_traits<_IIter2> _TraitsType2;
|
||||
typedef typename _TraitsType2::iterator_category _IteratorCategory2;
|
||||
|
||||
return inner_product_switch(first1, last1, first2, init, binary_op1,
|
||||
binary_op2, iterator1_category(),
|
||||
iterator2_category(), parallelism_tag);
|
||||
return __inner_product_switch(__first1, __last1, __first2, __init, __binary_op1,
|
||||
__binary_op2, _IteratorCategory1(),
|
||||
_IteratorCategory2(), __parallelism_tag);
|
||||
}
|
||||
|
||||
template<typename InputIterator1, typename InputIterator2, typename T,
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp,
|
||||
typename BinaryFunction1, typename BinaryFunction2>
|
||||
inline T
|
||||
inner_product(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init, BinaryFunction1 binary_op1,
|
||||
BinaryFunction2 binary_op2)
|
||||
inline _Tp
|
||||
inner_product(_IIter1 __first1, _IIter1 __last1,
|
||||
_IIter2 __first2, _Tp __init, BinaryFunction1 __binary_op1,
|
||||
BinaryFunction2 __binary_op2)
|
||||
{
|
||||
typedef iterator_traits<InputIterator1> traits1_type;
|
||||
typedef typename traits1_type::iterator_category iterator1_category;
|
||||
typedef iterator_traits<_IIter1> _TraitsType1;
|
||||
typedef typename _TraitsType1::iterator_category _IteratorCategory1;
|
||||
|
||||
typedef iterator_traits<InputIterator2> traits2_type;
|
||||
typedef typename traits2_type::iterator_category iterator2_category;
|
||||
typedef iterator_traits<_IIter2> _TraitsType2;
|
||||
typedef typename _TraitsType2::iterator_category _IteratorCategory2;
|
||||
|
||||
return inner_product_switch(first1, last1, first2, init, binary_op1,
|
||||
binary_op2, iterator1_category(),
|
||||
iterator2_category());
|
||||
return __inner_product_switch(__first1, __last1, __first2, __init, __binary_op1,
|
||||
__binary_op2, _IteratorCategory1(),
|
||||
_IteratorCategory2());
|
||||
}
|
||||
|
||||
template<typename InputIterator1, typename InputIterator2, typename T>
|
||||
inline T
|
||||
inner_product(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init,
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp>
|
||||
inline _Tp
|
||||
inner_product(_IIter1 __first1, _IIter1 __last1,
|
||||
_IIter2 __first2, _Tp __init,
|
||||
__gnu_parallel::_Parallelism __parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator1> traits_type1;
|
||||
typedef iterator_traits<_IIter1> traits_type1;
|
||||
typedef typename traits_type1::value_type value_type1;
|
||||
typedef iterator_traits<InputIterator2> traits_type2;
|
||||
typedef iterator_traits<_IIter2> traits_type2;
|
||||
typedef typename traits_type2::value_type value_type2;
|
||||
|
||||
typedef typename
|
||||
__gnu_parallel::multiplies<value_type1, value_type2>::result
|
||||
multiplies_result_type;
|
||||
return inner_product(first1, last1, first2, init,
|
||||
__gnu_parallel::plus<T, multiplies_result_type>(),
|
||||
__gnu_parallel::_Multiplies<value_type1, value_type2>::__result
|
||||
_MultipliesResultType;
|
||||
return inner_product(__first1, __last1, __first2, __init,
|
||||
__gnu_parallel::_Plus<_Tp, _MultipliesResultType>(),
|
||||
__gnu_parallel::
|
||||
multiplies<value_type1, value_type2>(),
|
||||
parallelism_tag);
|
||||
_Multiplies<value_type1, value_type2>(),
|
||||
__parallelism_tag);
|
||||
}
|
||||
|
||||
template<typename InputIterator1, typename InputIterator2, typename T>
|
||||
inline T
|
||||
inner_product(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init)
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp>
|
||||
inline _Tp
|
||||
inner_product(_IIter1 __first1, _IIter1 __last1,
|
||||
_IIter2 __first2, _Tp __init)
|
||||
{
|
||||
typedef iterator_traits<InputIterator1> traits_type1;
|
||||
typedef iterator_traits<_IIter1> traits_type1;
|
||||
typedef typename traits_type1::value_type value_type1;
|
||||
typedef iterator_traits<InputIterator2> traits_type2;
|
||||
typedef iterator_traits<_IIter2> traits_type2;
|
||||
typedef typename traits_type2::value_type value_type2;
|
||||
|
||||
typedef typename
|
||||
__gnu_parallel::multiplies<value_type1, value_type2>::result
|
||||
multiplies_result_type;
|
||||
return inner_product(first1, last1, first2, init,
|
||||
__gnu_parallel::plus<T, multiplies_result_type>(),
|
||||
__gnu_parallel::_Multiplies<value_type1, value_type2>::__result
|
||||
_MultipliesResultType;
|
||||
return inner_product(__first1, __last1, __first2, __init,
|
||||
__gnu_parallel::_Plus<_Tp, _MultipliesResultType>(),
|
||||
__gnu_parallel::
|
||||
multiplies<value_type1, value_type2>());
|
||||
_Multiplies<value_type1, value_type2>());
|
||||
}
|
||||
|
||||
// Sequential fallback.
|
||||
template<typename InputIterator, typename OutputIterator>
|
||||
inline OutputIterator
|
||||
partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
|
||||
template<typename _IIter, typename _OutputIterator>
|
||||
inline _OutputIterator
|
||||
partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result,
|
||||
__gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::partial_sum(begin, end, result); }
|
||||
{ return _GLIBCXX_STD_P::partial_sum(__begin, __end, __result); }
|
||||
|
||||
// Sequential fallback.
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
inline OutputIterator
|
||||
partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
|
||||
BinaryOperation bin_op, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); }
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
inline _OutputIterator
|
||||
partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result,
|
||||
_BinaryOperation __bin_op, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::partial_sum(__begin, __end, __result, __bin_op); }
|
||||
|
||||
// Sequential fallback for input iterator case.
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation, typename IteratorTag1,
|
||||
typename IteratorTag2>
|
||||
inline OutputIterator
|
||||
partial_sum_switch(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation bin_op,
|
||||
IteratorTag1, IteratorTag2)
|
||||
{ return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); }
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation, typename _IteratorTag1,
|
||||
typename _IteratorTag2>
|
||||
inline _OutputIterator
|
||||
__partial_sum_switch(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __bin_op,
|
||||
_IteratorTag1, _IteratorTag2)
|
||||
{ return _GLIBCXX_STD_P::partial_sum(__begin, __end, __result, __bin_op); }
|
||||
|
||||
// Parallel algorithm for random access iterators.
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
OutputIterator
|
||||
partial_sum_switch(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation bin_op,
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
_OutputIterator
|
||||
__partial_sum_switch(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __bin_op,
|
||||
random_access_iterator_tag, random_access_iterator_tag)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin)
|
||||
>= __gnu_parallel::_Settings::get().partial_sum_minimal_n))
|
||||
return __gnu_parallel::parallel_partial_sum(begin, end,
|
||||
result, bin_op);
|
||||
return __gnu_parallel::__parallel_partial_sum(__begin, __end,
|
||||
__result, __bin_op);
|
||||
else
|
||||
return partial_sum(begin, end, result, bin_op,
|
||||
return partial_sum(__begin, __end, __result, __bin_op,
|
||||
__gnu_parallel::sequential_tag());
|
||||
}
|
||||
|
||||
// Public interface.
|
||||
template<typename InputIterator, typename OutputIterator>
|
||||
inline OutputIterator
|
||||
partial_sum(InputIterator begin, InputIterator end, OutputIterator result)
|
||||
template<typename _IIter, typename _OutputIterator>
|
||||
inline _OutputIterator
|
||||
partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result)
|
||||
{
|
||||
typedef typename iterator_traits<InputIterator>::value_type value_type;
|
||||
return partial_sum(begin, end, result, std::plus<value_type>());
|
||||
typedef typename iterator_traits<_IIter>::value_type _ValueType;
|
||||
return partial_sum(__begin, __end, __result, std::plus<_ValueType>());
|
||||
}
|
||||
|
||||
// Public interface
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
inline OutputIterator
|
||||
partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
|
||||
BinaryOperation binary_op)
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
inline _OutputIterator
|
||||
partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result,
|
||||
_BinaryOperation __binary_op)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traitsi_type;
|
||||
typedef typename traitsi_type::iterator_category iteratori_category;
|
||||
typedef iterator_traits<_IIter> traitsi_type;
|
||||
typedef typename traitsi_type::iterator_category _IIteratorCategory;
|
||||
|
||||
typedef iterator_traits<OutputIterator> traitso_type;
|
||||
typedef typename traitso_type::iterator_category iteratoro_category;
|
||||
typedef iterator_traits<_OutputIterator> _OTraitsType;
|
||||
typedef typename _OTraitsType::iterator_category _OIterCategory;
|
||||
|
||||
return partial_sum_switch(begin, end, result, binary_op,
|
||||
iteratori_category(), iteratoro_category());
|
||||
return __partial_sum_switch(__begin, __end, __result, __binary_op,
|
||||
_IIteratorCategory(), _OIterCategory());
|
||||
}
|
||||
|
||||
// Sequential fallback.
|
||||
template<typename InputIterator, typename OutputIterator>
|
||||
inline OutputIterator
|
||||
adjacent_difference(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::adjacent_difference(begin, end, result); }
|
||||
template<typename _IIter, typename _OutputIterator>
|
||||
inline _OutputIterator
|
||||
adjacent_difference(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, __gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::adjacent_difference(__begin, __end, __result); }
|
||||
|
||||
// Sequential fallback.
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
inline OutputIterator
|
||||
adjacent_difference(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation bin_op,
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
inline _OutputIterator
|
||||
adjacent_difference(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __bin_op,
|
||||
__gnu_parallel::sequential_tag)
|
||||
{ return _GLIBCXX_STD_P::adjacent_difference(begin, end, result, bin_op); }
|
||||
{ return _GLIBCXX_STD_P::adjacent_difference(__begin, __end, __result, __bin_op); }
|
||||
|
||||
// Sequential fallback for input iterator case.
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation, typename IteratorTag1,
|
||||
typename IteratorTag2>
|
||||
inline OutputIterator
|
||||
adjacent_difference_switch(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation bin_op,
|
||||
IteratorTag1, IteratorTag2)
|
||||
{ return adjacent_difference(begin, end, result, bin_op,
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation, typename _IteratorTag1,
|
||||
typename _IteratorTag2>
|
||||
inline _OutputIterator
|
||||
__adjacent_difference_switch(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __bin_op,
|
||||
_IteratorTag1, _IteratorTag2)
|
||||
{ return adjacent_difference(__begin, __end, __result, __bin_op,
|
||||
__gnu_parallel::sequential_tag()); }
|
||||
|
||||
// Parallel algorithm for random access iterators.
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
OutputIterator
|
||||
adjacent_difference_switch(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation bin_op,
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
_OutputIterator
|
||||
__adjacent_difference_switch(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __bin_op,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism __parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin)
|
||||
>= __gnu_parallel::_Settings::get().adjacent_difference_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
&& __gnu_parallel::__is_parallel(__parallelism_tag)))
|
||||
{
|
||||
bool dummy = true;
|
||||
typedef __gnu_parallel::iterator_pair<InputIterator, OutputIterator,
|
||||
random_access_iterator_tag> ip;
|
||||
*result = *begin;
|
||||
ip begin_pair(begin + 1, result + 1),
|
||||
end_pair(end, result + (end - begin));
|
||||
__gnu_parallel::adjacent_difference_selector<ip> functionality;
|
||||
bool __dummy = true;
|
||||
typedef __gnu_parallel::_IteratorPair<_IIter, _OutputIterator,
|
||||
random_access_iterator_tag> _ItTrip;
|
||||
*__result = *__begin;
|
||||
_ItTrip begin_pair(__begin + 1, __result + 1),
|
||||
end_pair(__end, __result + (__end - __begin));
|
||||
__gnu_parallel::__adjacent_difference_selector<_ItTrip> __functionality;
|
||||
__gnu_parallel::
|
||||
for_each_template_random_access_ed(begin_pair, end_pair, bin_op,
|
||||
functionality,
|
||||
__gnu_parallel::dummy_reduct(),
|
||||
dummy, dummy, -1);
|
||||
return functionality.finish_iterator;
|
||||
for_each_template_random_access_ed(begin_pair, end_pair, __bin_op,
|
||||
__functionality,
|
||||
__gnu_parallel::_DummyReduct(),
|
||||
__dummy, __dummy, -1);
|
||||
return __functionality.finish_iterator;
|
||||
}
|
||||
else
|
||||
return adjacent_difference(begin, end, result, bin_op,
|
||||
return adjacent_difference(__begin, __end, __result, __bin_op,
|
||||
__gnu_parallel::sequential_tag());
|
||||
}
|
||||
|
||||
// Public interface.
|
||||
template<typename InputIterator, typename OutputIterator>
|
||||
inline OutputIterator
|
||||
adjacent_difference(InputIterator begin, InputIterator end,
|
||||
OutputIterator result,
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
template<typename _IIter, typename _OutputIterator>
|
||||
inline _OutputIterator
|
||||
adjacent_difference(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result,
|
||||
__gnu_parallel::_Parallelism __parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
return adjacent_difference(begin, end, result, std::minus<value_type>(),
|
||||
parallelism_tag);
|
||||
typedef iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
return adjacent_difference(__begin, __end, __result, std::minus<_ValueType>(),
|
||||
__parallelism_tag);
|
||||
}
|
||||
|
||||
template<typename InputIterator, typename OutputIterator>
|
||||
inline OutputIterator
|
||||
adjacent_difference(InputIterator begin, InputIterator end,
|
||||
OutputIterator result)
|
||||
template<typename _IIter, typename _OutputIterator>
|
||||
inline _OutputIterator
|
||||
adjacent_difference(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
return adjacent_difference(begin, end, result, std::minus<value_type>());
|
||||
typedef iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
return adjacent_difference(__begin, __end, __result, std::minus<_ValueType>());
|
||||
}
|
||||
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
inline OutputIterator
|
||||
adjacent_difference(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation binary_op,
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
inline _OutputIterator
|
||||
adjacent_difference(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __binary_op,
|
||||
__gnu_parallel::_Parallelism __parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traitsi_type;
|
||||
typedef typename traitsi_type::iterator_category iteratori_category;
|
||||
typedef iterator_traits<_IIter> traitsi_type;
|
||||
typedef typename traitsi_type::iterator_category _IIteratorCategory;
|
||||
|
||||
typedef iterator_traits<OutputIterator> traitso_type;
|
||||
typedef typename traitso_type::iterator_category iteratoro_category;
|
||||
typedef iterator_traits<_OutputIterator> _OTraitsType;
|
||||
typedef typename _OTraitsType::iterator_category _OIterCategory;
|
||||
|
||||
return adjacent_difference_switch(begin, end, result, binary_op,
|
||||
iteratori_category(),
|
||||
iteratoro_category(), parallelism_tag);
|
||||
return __adjacent_difference_switch(__begin, __end, __result, __binary_op,
|
||||
_IIteratorCategory(),
|
||||
_OIterCategory(), __parallelism_tag);
|
||||
}
|
||||
|
||||
template<typename InputIterator, typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
inline OutputIterator
|
||||
adjacent_difference(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation binary_op)
|
||||
template<typename _IIter, typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
inline _OutputIterator
|
||||
adjacent_difference(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __binary_op)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traitsi_type;
|
||||
typedef typename traitsi_type::iterator_category iteratori_category;
|
||||
typedef iterator_traits<_IIter> traitsi_type;
|
||||
typedef typename traitsi_type::iterator_category _IIteratorCategory;
|
||||
|
||||
typedef iterator_traits<OutputIterator> traitso_type;
|
||||
typedef typename traitso_type::iterator_category iteratoro_category;
|
||||
typedef iterator_traits<_OutputIterator> _OTraitsType;
|
||||
typedef typename _OTraitsType::iterator_category _OIterCategory;
|
||||
|
||||
return adjacent_difference_switch(begin, end, result, binary_op,
|
||||
iteratori_category(),
|
||||
iteratoro_category());
|
||||
return __adjacent_difference_switch(__begin, __end, __result, __binary_op,
|
||||
_IIteratorCategory(),
|
||||
_OIterCategory());
|
||||
}
|
||||
} // end namespace
|
||||
} // end namespace
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Tp, typename _Tag>
|
||||
_Tp
|
||||
accumulate_switch(_IIter, _IIter, _Tp, _Tag);
|
||||
__accumulate_switch(_IIter, _IIter, _Tp, _Tag);
|
||||
|
||||
template<typename _IIter, typename _Tp, typename _BinaryOper>
|
||||
_Tp
|
||||
|
|
@ -71,13 +71,13 @@ namespace __parallel
|
|||
template<typename _IIter, typename _Tp, typename _BinaryOper,
|
||||
typename _Tag>
|
||||
_Tp
|
||||
accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag);
|
||||
__accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag);
|
||||
|
||||
template<typename _RAIter, typename _Tp, typename _BinaryOper>
|
||||
_Tp
|
||||
accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
|
||||
__accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_unbalanced);
|
||||
|
||||
template<typename _IIter, typename _OIter>
|
||||
|
|
@ -111,15 +111,15 @@ namespace __parallel
|
|||
template<typename _IIter, typename _OIter, typename _BinaryOper,
|
||||
typename _Tag1, typename _Tag2>
|
||||
_OIter
|
||||
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
|
||||
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
|
||||
_Tag1, _Tag2);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename _BinaryOper>
|
||||
_OIter
|
||||
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
|
||||
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism parallelism
|
||||
__gnu_parallel::_Parallelism __parallelism
|
||||
= __gnu_parallel::parallel_unbalanced);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp>
|
||||
|
|
@ -157,7 +157,7 @@ namespace __parallel
|
|||
template<typename _RAIter1, typename _RAIter2, typename _Tp,
|
||||
typename BinaryFunction1, typename BinaryFunction2>
|
||||
_Tp
|
||||
inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
|
||||
__inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
|
||||
BinaryFunction2, random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::_Parallelism
|
||||
|
|
@ -167,7 +167,7 @@ namespace __parallel
|
|||
typename _BinaryFunction1, typename _BinaryFunction2,
|
||||
typename _Tag1, typename _Tag2>
|
||||
_Tp
|
||||
inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
|
||||
__inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
|
||||
_BinaryFunction2, _Tag1, _Tag2);
|
||||
|
||||
|
||||
|
|
@ -182,7 +182,7 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _OIter>
|
||||
_OIter
|
||||
partial_sum(_IIter, _IIter, _OIter result);
|
||||
partial_sum(_IIter, _IIter, _OIter __result);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename _BinaryOper>
|
||||
_OIter
|
||||
|
|
@ -191,11 +191,11 @@ namespace __parallel
|
|||
template<typename _IIter, typename _OIter, typename _BinaryOper,
|
||||
typename _Tag1, typename _Tag2>
|
||||
_OIter
|
||||
partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
|
||||
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename _BinaryOper>
|
||||
_OIter
|
||||
partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper,
|
||||
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper,
|
||||
random_access_iterator_tag, random_access_iterator_tag);
|
||||
} // end namespace
|
||||
} // end namespace
|
||||
|
|
|
|||
|
|
@ -44,73 +44,73 @@ namespace __gnu_parallel
|
|||
/** @brief Embarrassingly parallel algorithm for random access
|
||||
* iterators, using an OpenMP for loop.
|
||||
*
|
||||
* @param begin Begin iterator of element sequence.
|
||||
* @param end End iterator of element sequence.
|
||||
* @param o User-supplied functor (comparator, predicate, adding
|
||||
* @param __begin Begin iterator of element __sequence.
|
||||
* @param __end End iterator of element __sequence.
|
||||
* @param __o User-supplied functor (comparator, predicate, adding
|
||||
* functor, etc.).
|
||||
* @param f Functor to "process" an element with op (depends on
|
||||
* @param __f Functor to "process" an element with __op (depends on
|
||||
* desired functionality, e. g. for std::for_each(), ...).
|
||||
* @param r Functor to "add" a single result to the already
|
||||
* processed elements (depends on functionality).
|
||||
* @param base Base value for reduction.
|
||||
* @param output Pointer to position where final result is written to
|
||||
* @param bound Maximum number of elements processed (e. g. for
|
||||
* @param __r Functor to "add" a single __result to the already
|
||||
* processed __elements (depends on functionality).
|
||||
* @param __base Base value for reduction.
|
||||
* @param __output Pointer to position where final result is written to
|
||||
* @param __bound Maximum number of elements processed (e. g. for
|
||||
* std::count_n()).
|
||||
* @return User-supplied functor (that may contain a part of the result).
|
||||
*/
|
||||
template<typename RandomAccessIterator,
|
||||
typename Op,
|
||||
typename Fu,
|
||||
typename Red,
|
||||
typename Result>
|
||||
Op
|
||||
for_each_template_random_access_omp_loop(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
Op o, Fu& f, Red r, Result base,
|
||||
Result& output,
|
||||
template<typename _RAIter,
|
||||
typename _Op,
|
||||
typename _Fu,
|
||||
typename _Red,
|
||||
typename _Result>
|
||||
_Op
|
||||
for_each_template_random_access_omp_loop(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Op __o, _Fu& __f, _Red __r, _Result __base,
|
||||
_Result& __output,
|
||||
typename std::iterator_traits
|
||||
<RandomAccessIterator>::
|
||||
difference_type bound)
|
||||
<_RAIter>::
|
||||
difference_type __bound)
|
||||
{
|
||||
typedef typename
|
||||
std::iterator_traits<RandomAccessIterator>::difference_type
|
||||
difference_type;
|
||||
std::iterator_traits<_RAIter>::difference_type
|
||||
_DifferenceType;
|
||||
|
||||
difference_type length = end - begin;
|
||||
thread_index_t num_threads =
|
||||
__gnu_parallel::min<difference_type>(get_max_threads(), length);
|
||||
_DifferenceType __length = __end - __begin;
|
||||
_ThreadIndex __num_threads =
|
||||
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
|
||||
|
||||
Result *thread_results;
|
||||
_Result *__thread_results;
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
thread_results = new Result[num_threads];
|
||||
__num_threads = omp_get_num_threads();
|
||||
__thread_results = new _Result[__num_threads];
|
||||
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
thread_results[i] = Result();
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
__thread_results[__i] = _Result();
|
||||
}
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
# pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
|
||||
for (difference_type pos = 0; pos < length; ++pos)
|
||||
thread_results[iam] =
|
||||
r(thread_results[iam], f(o, begin+pos));
|
||||
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
|
||||
__thread_results[__iam] =
|
||||
__r(__thread_results[__iam], __f(__o, __begin+__pos));
|
||||
} //parallel
|
||||
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
output = r(output, thread_results[i]);
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
__output = __r(__output, __thread_results[__i]);
|
||||
|
||||
delete [] thread_results;
|
||||
delete [] __thread_results;
|
||||
|
||||
// Points to last element processed (needed as return value for
|
||||
// some algorithms like transform).
|
||||
f.finish_iterator = begin + length;
|
||||
__f.finish_iterator = __begin + __length;
|
||||
|
||||
return o;
|
||||
return __o;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
|
|
|||
|
|
@ -44,72 +44,72 @@ namespace __gnu_parallel
|
|||
/** @brief Embarrassingly parallel algorithm for random access
|
||||
* iterators, using an OpenMP for loop with static scheduling.
|
||||
*
|
||||
* @param begin Begin iterator of element sequence.
|
||||
* @param end End iterator of element sequence.
|
||||
* @param o User-supplied functor (comparator, predicate, adding
|
||||
* @param __begin Begin iterator of element __sequence.
|
||||
* @param __end End iterator of element __sequence.
|
||||
* @param __o User-supplied functor (comparator, predicate, adding
|
||||
* functor, ...).
|
||||
* @param f Functor to "process" an element with op (depends on
|
||||
* @param __f Functor to "process" an element with __op (depends on
|
||||
* desired functionality, e. g. for std::for_each(), ...).
|
||||
* @param r Functor to "add" a single result to the already processed
|
||||
* elements (depends on functionality).
|
||||
* @param base Base value for reduction.
|
||||
* @param output Pointer to position where final result is written to
|
||||
* @param bound Maximum number of elements processed (e. g. for
|
||||
* @param __r Functor to "add" a single __result to the already processed
|
||||
* __elements (depends on functionality).
|
||||
* @param __base Base value for reduction.
|
||||
* @param __output Pointer to position where final result is written to
|
||||
* @param __bound Maximum number of elements processed (e. g. for
|
||||
* std::count_n()).
|
||||
* @return User-supplied functor (that may contain a part of the result).
|
||||
*/
|
||||
template<typename RandomAccessIterator,
|
||||
typename Op,
|
||||
typename Fu,
|
||||
typename Red,
|
||||
typename Result>
|
||||
Op
|
||||
for_each_template_random_access_omp_loop_static(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
Op o, Fu& f, Red r,
|
||||
Result base, Result& output,
|
||||
template<typename _RAIter,
|
||||
typename _Op,
|
||||
typename _Fu,
|
||||
typename _Red,
|
||||
typename _Result>
|
||||
_Op
|
||||
for_each_template_random_access_omp_loop_static(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Op __o, _Fu& __f, _Red __r,
|
||||
_Result __base, _Result& __output,
|
||||
typename std::iterator_traits
|
||||
<RandomAccessIterator>::
|
||||
difference_type bound)
|
||||
<_RAIter>::
|
||||
difference_type __bound)
|
||||
{
|
||||
typedef typename
|
||||
std::iterator_traits<RandomAccessIterator>::difference_type
|
||||
difference_type;
|
||||
std::iterator_traits<_RAIter>::difference_type
|
||||
_DifferenceType;
|
||||
|
||||
difference_type length = end - begin;
|
||||
thread_index_t num_threads =
|
||||
std::min<difference_type>(get_max_threads(), length);
|
||||
_DifferenceType __length = __end - __begin;
|
||||
_ThreadIndex __num_threads =
|
||||
std::min<_DifferenceType>(__get_max_threads(), __length);
|
||||
|
||||
Result *thread_results;
|
||||
_Result *__thread_results;
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
thread_results = new Result[num_threads];
|
||||
__num_threads = omp_get_num_threads();
|
||||
__thread_results = new _Result[__num_threads];
|
||||
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
thread_results[i] = Result();
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
__thread_results[__i] = _Result();
|
||||
}
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
# pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
|
||||
for (difference_type pos = 0; pos < length; ++pos)
|
||||
thread_results[iam] = r(thread_results[iam], f(o, begin+pos));
|
||||
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
|
||||
__thread_results[__iam] = __r(__thread_results[__iam], __f(__o, __begin+__pos));
|
||||
} //parallel
|
||||
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
output = r(output, thread_results[i]);
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
__output = __r(__output, __thread_results[__i]);
|
||||
|
||||
delete [] thread_results;
|
||||
delete [] __thread_results;
|
||||
|
||||
// Points to last element processed (needed as return value for
|
||||
// some algorithms like transform).
|
||||
f.finish_iterator = begin + length;
|
||||
__f.finish_iterator = __begin + __length;
|
||||
|
||||
return o;
|
||||
return __o;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
|
|
|||
|
|
@ -45,89 +45,89 @@ namespace __gnu_parallel
|
|||
* iterators, using hand-crafted parallelization by equal splitting
|
||||
* the work.
|
||||
*
|
||||
* @param begin Begin iterator of element sequence.
|
||||
* @param end End iterator of element sequence.
|
||||
* @param o User-supplied functor (comparator, predicate, adding
|
||||
* @param __begin Begin iterator of element __sequence.
|
||||
* @param __end End iterator of element __sequence.
|
||||
* @param __o User-supplied functor (comparator, predicate, adding
|
||||
* functor, ...)
|
||||
* @param f Functor to "process" an element with op (depends on
|
||||
* @param __f Functor to "process" an element with __op (depends on
|
||||
* desired functionality, e. g. for std::for_each(), ...).
|
||||
* @param r Functor to "add" a single result to the already
|
||||
* processed elements (depends on functionality).
|
||||
* @param base Base value for reduction.
|
||||
* @param output Pointer to position where final result is written to
|
||||
* @param bound Maximum number of elements processed (e. g. for
|
||||
* @param __r Functor to "add" a single __result to the already
|
||||
* processed __elements (depends on functionality).
|
||||
* @param __base Base value for reduction.
|
||||
* @param __output Pointer to position where final result is written to
|
||||
* @param __bound Maximum number of elements processed (e. g. for
|
||||
* std::count_n()).
|
||||
* @return User-supplied functor (that may contain a part of the result).
|
||||
*/
|
||||
template<typename RandomAccessIterator,
|
||||
typename Op,
|
||||
typename Fu,
|
||||
typename Red,
|
||||
typename Result>
|
||||
Op
|
||||
for_each_template_random_access_ed(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
Op o, Fu& f, Red r, Result base,
|
||||
Result& output,
|
||||
template<typename _RAIter,
|
||||
typename _Op,
|
||||
typename _Fu,
|
||||
typename _Red,
|
||||
typename _Result>
|
||||
_Op
|
||||
for_each_template_random_access_ed(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Op __o, _Fu& __f, _Red __r, _Result __base,
|
||||
_Result& __output,
|
||||
typename std::iterator_traits
|
||||
<RandomAccessIterator>::
|
||||
difference_type bound)
|
||||
<_RAIter>::
|
||||
difference_type __bound)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
const difference_type length = end - begin;
|
||||
Result *thread_results;
|
||||
bool* constructed;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
const _DifferenceType __length = __end - __begin;
|
||||
_Result *__thread_results;
|
||||
bool* __constructed;
|
||||
|
||||
thread_index_t num_threads =
|
||||
__gnu_parallel::min<difference_type>(get_max_threads(), length);
|
||||
_ThreadIndex __num_threads =
|
||||
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
thread_results = static_cast<Result*>(
|
||||
::operator new(num_threads * sizeof(Result)));
|
||||
constructed = new bool[num_threads];
|
||||
__num_threads = omp_get_num_threads();
|
||||
__thread_results = static_cast<_Result*>(
|
||||
::operator new(__num_threads * sizeof(_Result)));
|
||||
__constructed = new bool[__num_threads];
|
||||
}
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
// Neutral element.
|
||||
Result* reduct = static_cast<Result*>(::operator new(sizeof(Result)));
|
||||
_Result* __reduct = static_cast<_Result*>(::operator new(sizeof(_Result)));
|
||||
|
||||
difference_type
|
||||
start = equally_split_point(length, num_threads, iam),
|
||||
stop = equally_split_point(length, num_threads, iam + 1);
|
||||
_DifferenceType
|
||||
__start = equally_split_point(__length, __num_threads, __iam),
|
||||
__stop = equally_split_point(__length, __num_threads, __iam + 1);
|
||||
|
||||
if (start < stop)
|
||||
if (__start < __stop)
|
||||
{
|
||||
new(reduct) Result(f(o, begin + start));
|
||||
++start;
|
||||
constructed[iam] = true;
|
||||
new(__reduct) _Result(__f(__o, __begin + __start));
|
||||
++__start;
|
||||
__constructed[__iam] = true;
|
||||
}
|
||||
else
|
||||
constructed[iam] = false;
|
||||
__constructed[__iam] = false;
|
||||
|
||||
for (; start < stop; ++start)
|
||||
*reduct = r(*reduct, f(o, begin + start));
|
||||
for (; __start < __stop; ++__start)
|
||||
*__reduct = __r(*__reduct, __f(__o, __begin + __start));
|
||||
|
||||
thread_results[iam] = *reduct;
|
||||
__thread_results[__iam] = *__reduct;
|
||||
} //parallel
|
||||
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
if (constructed[i])
|
||||
output = r(output, thread_results[i]);
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
if (__constructed[__i])
|
||||
__output = __r(__output, __thread_results[__i]);
|
||||
|
||||
// Points to last element processed (needed as return value for
|
||||
// some algorithms like transform).
|
||||
f.finish_iterator = begin + length;
|
||||
__f.finish_iterator = __begin + __length;
|
||||
|
||||
delete[] thread_results;
|
||||
delete[] constructed;
|
||||
delete[] __thread_results;
|
||||
delete[] __constructed;
|
||||
|
||||
return o;
|
||||
return __o;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
|
|
|||
|
|
@ -23,8 +23,8 @@
|
|||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/** @file parallel/partial_sum.h
|
||||
* @brief Parallel implementation of std::partial_sum(), i. e. prefix
|
||||
* sums.
|
||||
* @brief Parallel implementation of std::partial_sum(), i.e. prefix
|
||||
* sums.
|
||||
* This file is a GNU parallel extension to the Standard C++ Library.
|
||||
*/
|
||||
|
||||
|
|
@ -44,175 +44,175 @@ namespace __gnu_parallel
|
|||
// Problem: there is no 0-element given.
|
||||
|
||||
/** @brief Base case prefix sum routine.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param result Begin iterator of output sequence.
|
||||
* @param bin_op Associative binary function.
|
||||
* @param value Start value. Must be passed since the neutral
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __result Begin iterator of output sequence.
|
||||
* @param __bin_op Associative binary function.
|
||||
* @param __value Start value. Must be passed since the neutral
|
||||
* element is unknown in general.
|
||||
* @return End iterator of output sequence. */
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
OutputIterator
|
||||
parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation bin_op,
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
_OutputIterator
|
||||
__parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __bin_op,
|
||||
typename std::iterator_traits
|
||||
<InputIterator>::value_type value)
|
||||
<_IIter>::value_type __value)
|
||||
{
|
||||
if (begin == end)
|
||||
return result;
|
||||
if (__begin == __end)
|
||||
return __result;
|
||||
|
||||
while (begin != end)
|
||||
while (__begin != __end)
|
||||
{
|
||||
value = bin_op(value, *begin);
|
||||
*result = value;
|
||||
++result;
|
||||
++begin;
|
||||
__value = __bin_op(__value, *__begin);
|
||||
*__result = __value;
|
||||
++__result;
|
||||
++__begin;
|
||||
}
|
||||
return result;
|
||||
return __result;
|
||||
}
|
||||
|
||||
/** @brief Parallel partial sum implementation, two-phase approach,
|
||||
no recursion.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param result Begin iterator of output sequence.
|
||||
* @param bin_op Associative binary function.
|
||||
* @param n Length of sequence.
|
||||
* @param num_threads Number of threads to use.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __result Begin iterator of output sequence.
|
||||
* @param __bin_op Associative binary function.
|
||||
* @param __n Length of sequence.
|
||||
* @param __num_threads Number of threads to use.
|
||||
* @return End iterator of output sequence.
|
||||
*/
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
OutputIterator
|
||||
parallel_partial_sum_linear(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation bin_op,
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
_OutputIterator
|
||||
__parallel_partial_sum_linear(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __bin_op,
|
||||
typename std::iterator_traits
|
||||
<InputIterator>::difference_type n)
|
||||
<_IIter>::difference_type __n)
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
if (begin == end)
|
||||
return result;
|
||||
if (__begin == __end)
|
||||
return __result;
|
||||
|
||||
thread_index_t num_threads =
|
||||
std::min<difference_type>(get_max_threads(), n - 1);
|
||||
_ThreadIndex __num_threads =
|
||||
std::min<_DifferenceType>(__get_max_threads(), __n - 1);
|
||||
|
||||
if (num_threads < 2)
|
||||
if (__num_threads < 2)
|
||||
{
|
||||
*result = *begin;
|
||||
return parallel_partial_sum_basecase(
|
||||
begin + 1, end, result + 1, bin_op, *begin);
|
||||
*__result = *__begin;
|
||||
return __parallel_partial_sum_basecase(
|
||||
__begin + 1, __end, __result + 1, __bin_op, *__begin);
|
||||
}
|
||||
|
||||
difference_type* borders;
|
||||
value_type* sums;
|
||||
_DifferenceType* __borders;
|
||||
_ValueType* __sums;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
borders = new difference_type[num_threads + 2];
|
||||
__borders = new _DifferenceType[__num_threads + 2];
|
||||
|
||||
if (__s.partial_sum_dilation == 1.0f)
|
||||
equally_split(n, num_threads + 1, borders);
|
||||
equally_split(__n, __num_threads + 1, __borders);
|
||||
else
|
||||
{
|
||||
difference_type chunk_length =
|
||||
((double)n
|
||||
/ ((double)num_threads + __s.partial_sum_dilation)),
|
||||
borderstart = n - num_threads * chunk_length;
|
||||
borders[0] = 0;
|
||||
for (int i = 1; i < (num_threads + 1); ++i)
|
||||
_DifferenceType __chunk_length =
|
||||
((double)__n
|
||||
/ ((double)__num_threads + __s.partial_sum_dilation)),
|
||||
__borderstart = __n - __num_threads * __chunk_length;
|
||||
__borders[0] = 0;
|
||||
for (int __i = 1; __i < (__num_threads + 1); ++__i)
|
||||
{
|
||||
borders[i] = borderstart;
|
||||
borderstart += chunk_length;
|
||||
__borders[__i] = __borderstart;
|
||||
__borderstart += __chunk_length;
|
||||
}
|
||||
borders[num_threads + 1] = n;
|
||||
__borders[__num_threads + 1] = __n;
|
||||
}
|
||||
|
||||
sums = static_cast<value_type*>(::operator new(sizeof(value_type)
|
||||
* num_threads));
|
||||
OutputIterator target_end;
|
||||
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
|
||||
* __num_threads));
|
||||
_OutputIterator __target_end;
|
||||
} //single
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
if (iam == 0)
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
if (__iam == 0)
|
||||
{
|
||||
*result = *begin;
|
||||
parallel_partial_sum_basecase(begin + 1, begin + borders[1],
|
||||
result + 1, bin_op, *begin);
|
||||
::new(&(sums[iam])) value_type(*(result + borders[1] - 1));
|
||||
*__result = *__begin;
|
||||
__parallel_partial_sum_basecase(__begin + 1, __begin + __borders[1],
|
||||
__result + 1, __bin_op, *__begin);
|
||||
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
::new(&(sums[iam]))
|
||||
value_type(std::accumulate(begin + borders[iam] + 1,
|
||||
begin + borders[iam + 1],
|
||||
*(begin + borders[iam]),
|
||||
bin_op,
|
||||
::new(&(__sums[__iam]))
|
||||
_ValueType(std::accumulate(__begin + __borders[__iam] + 1,
|
||||
__begin + __borders[__iam + 1],
|
||||
*(__begin + __borders[__iam]),
|
||||
__bin_op,
|
||||
__gnu_parallel::sequential_tag()));
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp single
|
||||
parallel_partial_sum_basecase(
|
||||
sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
|
||||
__parallel_partial_sum_basecase(
|
||||
__sums + 1, __sums + __num_threads, __sums + 1, __bin_op, __sums[0]);
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Still same team.
|
||||
parallel_partial_sum_basecase(begin + borders[iam + 1],
|
||||
begin + borders[iam + 2],
|
||||
result + borders[iam + 1], bin_op,
|
||||
sums[iam]);
|
||||
__parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
|
||||
__begin + __borders[__iam + 2],
|
||||
__result + __borders[__iam + 1], __bin_op,
|
||||
__sums[__iam]);
|
||||
} //parallel
|
||||
|
||||
::operator delete(sums);
|
||||
delete[] borders;
|
||||
::operator delete(__sums);
|
||||
delete[] __borders;
|
||||
|
||||
return result + n;
|
||||
return __result + __n;
|
||||
}
|
||||
|
||||
/** @brief Parallel partial sum front-end.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param result Begin iterator of output sequence.
|
||||
* @param bin_op Associative binary function.
|
||||
/** @brief Parallel partial sum front-__end.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __result Begin iterator of output sequence.
|
||||
* @param __bin_op Associative binary function.
|
||||
* @return End iterator of output sequence. */
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename BinaryOperation>
|
||||
OutputIterator
|
||||
parallel_partial_sum(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation bin_op)
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _BinaryOperation>
|
||||
_OutputIterator
|
||||
__parallel_partial_sum(_IIter __begin, _IIter __end,
|
||||
_OutputIterator __result, _BinaryOperation __bin_op)
|
||||
{
|
||||
_GLIBCXX_CALL(begin - end)
|
||||
_GLIBCXX_CALL(__begin - __end)
|
||||
|
||||
typedef std::iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
switch (_Settings::get().partial_sum_algorithm)
|
||||
{
|
||||
case LINEAR:
|
||||
// Need an initial offset.
|
||||
return parallel_partial_sum_linear(begin, end, result, bin_op, n);
|
||||
// Need an initial __offset.
|
||||
return __parallel_partial_sum_linear(__begin, __end, __result, __bin_op, __n);
|
||||
default:
|
||||
// Partial_sum algorithm not implemented.
|
||||
_GLIBCXX_PARALLEL_ASSERT(0);
|
||||
return result + n;
|
||||
return __result + __n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,231 +45,231 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Parallel implementation of std::partition.
|
||||
* @param begin Begin iterator of input sequence to split.
|
||||
* @param end End iterator of input sequence to split.
|
||||
* @param pred Partition predicate, possibly including some kind of pivot.
|
||||
* @param num_threads Maximum number of threads to use for this task.
|
||||
* @param __begin Begin iterator of input sequence to split.
|
||||
* @param __end End iterator of input sequence to split.
|
||||
* @param __pred Partition predicate, possibly including some kind of pivot.
|
||||
* @param __num_threads Maximum number of threads to use for this task.
|
||||
* @return Number of elements not fulfilling the predicate. */
|
||||
template<typename RandomAccessIterator, typename Predicate>
|
||||
typename std::iterator_traits<RandomAccessIterator>::difference_type
|
||||
parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Predicate pred, thread_index_t num_threads)
|
||||
template<typename _RAIter, typename _Predicate>
|
||||
typename std::iterator_traits<_RAIter>::difference_type
|
||||
__parallel_partition(_RAIter __begin, _RAIter __end,
|
||||
_Predicate __pred, _ThreadIndex __num_threads)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
_GLIBCXX_CALL(n)
|
||||
_GLIBCXX_CALL(__n)
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
// Shared.
|
||||
_GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
|
||||
_GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
|
||||
_GLIBCXX_VOLATILE difference_type leftnew, rightnew;
|
||||
_GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1;
|
||||
_GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right;
|
||||
_GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew;
|
||||
|
||||
bool* reserved_left = NULL, * reserved_right = NULL;
|
||||
bool* __reserved_left = NULL, * __reserved_right = NULL;
|
||||
|
||||
difference_type chunk_size;
|
||||
_DifferenceType __chunk_size;
|
||||
|
||||
omp_lock_t result_lock;
|
||||
omp_init_lock(&result_lock);
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
//at least two chunks per thread
|
||||
if(right - left + 1 >= 2 * num_threads * chunk_size)
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
//at least two __chunks per thread
|
||||
if(__right - __left + 1 >= 2 * __num_threads * __chunk_size)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
reserved_left = new bool[num_threads];
|
||||
reserved_right = new bool[num_threads];
|
||||
__num_threads = omp_get_num_threads();
|
||||
__reserved_left = new bool[__num_threads];
|
||||
__reserved_right = new bool[__num_threads];
|
||||
|
||||
if (__s.partition_chunk_share > 0.0)
|
||||
chunk_size = std::max<difference_type>(__s.partition_chunk_size,
|
||||
(double)n * __s.partition_chunk_share
|
||||
/ (double)num_threads);
|
||||
__chunk_size = std::max<_DifferenceType>(__s.partition_chunk_size,
|
||||
(double)__n * __s.partition_chunk_share
|
||||
/ (double)__num_threads);
|
||||
else
|
||||
chunk_size = __s.partition_chunk_size;
|
||||
__chunk_size = __s.partition_chunk_size;
|
||||
}
|
||||
|
||||
while (right - left + 1 >= 2 * num_threads * chunk_size)
|
||||
while (__right - __left + 1 >= 2 * __num_threads * __chunk_size)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
difference_type num_chunks = (right - left + 1) / chunk_size;
|
||||
_DifferenceType __num_chunks = (__right - __left + 1) / __chunk_size;
|
||||
|
||||
for (int r = 0; r < num_threads; ++r)
|
||||
for (int __r = 0; __r < __num_threads; ++__r)
|
||||
{
|
||||
reserved_left[r] = false;
|
||||
reserved_right[r] = false;
|
||||
__reserved_left[__r] = false;
|
||||
__reserved_right[__r] = false;
|
||||
}
|
||||
leftover_left = 0;
|
||||
leftover_right = 0;
|
||||
__leftover_left = 0;
|
||||
__leftover_right = 0;
|
||||
} //implicit barrier
|
||||
|
||||
// Private.
|
||||
difference_type thread_left, thread_left_border,
|
||||
thread_right, thread_right_border;
|
||||
thread_left = left + 1;
|
||||
_DifferenceType __thread_left, __thread_left_border,
|
||||
thread_right, __thread_right_border;
|
||||
__thread_left = __left + 1;
|
||||
|
||||
// Just to satisfy the condition below.
|
||||
thread_left_border = thread_left - 1;
|
||||
thread_right = n - 1;
|
||||
thread_right_border = thread_right + 1;
|
||||
__thread_left_border = __thread_left - 1;
|
||||
thread_right = __n - 1;
|
||||
__thread_right_border = thread_right + 1;
|
||||
|
||||
bool iam_finished = false;
|
||||
while (!iam_finished)
|
||||
bool __iam_finished = false;
|
||||
while (!__iam_finished)
|
||||
{
|
||||
if (thread_left > thread_left_border)
|
||||
if (__thread_left > __thread_left_border)
|
||||
{
|
||||
omp_set_lock(&result_lock);
|
||||
if (left + (chunk_size - 1) > right)
|
||||
iam_finished = true;
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__left + (__chunk_size - 1) > __right)
|
||||
__iam_finished = true;
|
||||
else
|
||||
{
|
||||
thread_left = left;
|
||||
thread_left_border = left + (chunk_size - 1);
|
||||
left += chunk_size;
|
||||
__thread_left = __left;
|
||||
__thread_left_border = __left + (__chunk_size - 1);
|
||||
__left += __chunk_size;
|
||||
}
|
||||
omp_unset_lock(&result_lock);
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
|
||||
if (thread_right < thread_right_border)
|
||||
if (thread_right < __thread_right_border)
|
||||
{
|
||||
omp_set_lock(&result_lock);
|
||||
if (left > right - (chunk_size - 1))
|
||||
iam_finished = true;
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__left > __right - (__chunk_size - 1))
|
||||
__iam_finished = true;
|
||||
else
|
||||
{
|
||||
thread_right = right;
|
||||
thread_right_border = right - (chunk_size - 1);
|
||||
right -= chunk_size;
|
||||
thread_right = __right;
|
||||
__thread_right_border = __right - (__chunk_size - 1);
|
||||
__right -= __chunk_size;
|
||||
}
|
||||
omp_unset_lock(&result_lock);
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
|
||||
if (iam_finished)
|
||||
if (__iam_finished)
|
||||
break;
|
||||
|
||||
// Swap as usual.
|
||||
while (thread_left < thread_right)
|
||||
while (__thread_left < thread_right)
|
||||
{
|
||||
while (pred(begin[thread_left])
|
||||
&& thread_left <= thread_left_border)
|
||||
++thread_left;
|
||||
while (!pred(begin[thread_right])
|
||||
&& thread_right >= thread_right_border)
|
||||
while (__pred(__begin[__thread_left])
|
||||
&& __thread_left <= __thread_left_border)
|
||||
++__thread_left;
|
||||
while (!__pred(__begin[thread_right])
|
||||
&& thread_right >= __thread_right_border)
|
||||
--thread_right;
|
||||
|
||||
if (thread_left > thread_left_border
|
||||
|| thread_right < thread_right_border)
|
||||
// Fetch new chunk(s).
|
||||
if (__thread_left > __thread_left_border
|
||||
|| thread_right < __thread_right_border)
|
||||
// Fetch new chunk(__s).
|
||||
break;
|
||||
|
||||
std::swap(begin[thread_left], begin[thread_right]);
|
||||
++thread_left;
|
||||
std::swap(__begin[__thread_left], __begin[thread_right]);
|
||||
++__thread_left;
|
||||
--thread_right;
|
||||
}
|
||||
}
|
||||
|
||||
// Now swap the leftover chunks to the right places.
|
||||
if (thread_left <= thread_left_border)
|
||||
if (__thread_left <= __thread_left_border)
|
||||
# pragma omp atomic
|
||||
++leftover_left;
|
||||
if (thread_right >= thread_right_border)
|
||||
++__leftover_left;
|
||||
if (thread_right >= __thread_right_border)
|
||||
# pragma omp atomic
|
||||
++leftover_right;
|
||||
++__leftover_right;
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
leftnew = left - leftover_left * chunk_size;
|
||||
rightnew = right + leftover_right * chunk_size;
|
||||
__leftnew = __left - __leftover_left * __chunk_size;
|
||||
__rightnew = __right + __leftover_right * __chunk_size;
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// <=> thread_left_border + (chunk_size - 1) >= leftnew
|
||||
if (thread_left <= thread_left_border
|
||||
&& thread_left_border >= leftnew)
|
||||
// <=> __thread_left_border + (__chunk_size - 1) >= __leftnew
|
||||
if (__thread_left <= __thread_left_border
|
||||
&& __thread_left_border >= __leftnew)
|
||||
{
|
||||
// Chunk already in place, reserve spot.
|
||||
reserved_left[(left - (thread_left_border + 1)) / chunk_size]
|
||||
__reserved_left[(__left - (__thread_left_border + 1)) / __chunk_size]
|
||||
= true;
|
||||
}
|
||||
|
||||
// <=> thread_right_border - (chunk_size - 1) <= rightnew
|
||||
if (thread_right >= thread_right_border
|
||||
&& thread_right_border <= rightnew)
|
||||
// <=> __thread_right_border - (__chunk_size - 1) <= __rightnew
|
||||
if (thread_right >= __thread_right_border
|
||||
&& __thread_right_border <= __rightnew)
|
||||
{
|
||||
// Chunk already in place, reserve spot.
|
||||
reserved_right[((thread_right_border - 1) - right)
|
||||
/ chunk_size] = true;
|
||||
__reserved_right[((__thread_right_border - 1) - __right)
|
||||
/ __chunk_size] = true;
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
if (thread_left <= thread_left_border
|
||||
&& thread_left_border < leftnew)
|
||||
if (__thread_left <= __thread_left_border
|
||||
&& __thread_left_border < __leftnew)
|
||||
{
|
||||
// Find spot and swap.
|
||||
difference_type swapstart = -1;
|
||||
omp_set_lock(&result_lock);
|
||||
for (int r = 0; r < leftover_left; ++r)
|
||||
if (!reserved_left[r])
|
||||
_DifferenceType __swapstart = -1;
|
||||
omp_set_lock(&__result_lock);
|
||||
for (int __r = 0; __r < __leftover_left; ++__r)
|
||||
if (!__reserved_left[__r])
|
||||
{
|
||||
reserved_left[r] = true;
|
||||
swapstart = left - (r + 1) * chunk_size;
|
||||
__reserved_left[__r] = true;
|
||||
__swapstart = __left - (__r + 1) * __chunk_size;
|
||||
break;
|
||||
}
|
||||
omp_unset_lock(&result_lock);
|
||||
omp_unset_lock(&__result_lock);
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
|
||||
#endif
|
||||
|
||||
std::swap_ranges(begin + thread_left_border
|
||||
- (chunk_size - 1),
|
||||
begin + thread_left_border + 1,
|
||||
begin + swapstart);
|
||||
std::swap_ranges(__begin + __thread_left_border
|
||||
- (__chunk_size - 1),
|
||||
__begin + __thread_left_border + 1,
|
||||
__begin + __swapstart);
|
||||
}
|
||||
|
||||
if (thread_right >= thread_right_border
|
||||
&& thread_right_border > rightnew)
|
||||
if (thread_right >= __thread_right_border
|
||||
&& __thread_right_border > __rightnew)
|
||||
{
|
||||
// Find spot and swap
|
||||
difference_type swapstart = -1;
|
||||
omp_set_lock(&result_lock);
|
||||
for (int r = 0; r < leftover_right; ++r)
|
||||
if (!reserved_right[r])
|
||||
_DifferenceType __swapstart = -1;
|
||||
omp_set_lock(&__result_lock);
|
||||
for (int __r = 0; __r < __leftover_right; ++__r)
|
||||
if (!__reserved_right[__r])
|
||||
{
|
||||
reserved_right[r] = true;
|
||||
swapstart = right + r * chunk_size + 1;
|
||||
__reserved_right[__r] = true;
|
||||
__swapstart = __right + __r * __chunk_size + 1;
|
||||
break;
|
||||
}
|
||||
omp_unset_lock(&result_lock);
|
||||
omp_unset_lock(&__result_lock);
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
|
||||
#endif
|
||||
|
||||
std::swap_ranges(begin + thread_right_border,
|
||||
begin + thread_right_border + chunk_size,
|
||||
begin + swapstart);
|
||||
std::swap_ranges(__begin + __thread_right_border,
|
||||
__begin + __thread_right_border + __chunk_size,
|
||||
__begin + __swapstart);
|
||||
}
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
for (int r = 0; r < leftover_left; ++r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
|
||||
for (int r = 0; r < leftover_right; ++r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
|
||||
for (int __r = 0; __r < __leftover_left; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]);
|
||||
for (int __r = 0; __r < __leftover_right; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]);
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
|
|
@ -277,149 +277,149 @@ template<typename RandomAccessIterator, typename Predicate>
|
|||
|
||||
# pragma omp barrier
|
||||
|
||||
left = leftnew;
|
||||
right = rightnew;
|
||||
__left = __leftnew;
|
||||
__right = __rightnew;
|
||||
}
|
||||
# pragma omp flush(left, right)
|
||||
# pragma omp flush(__left, __right)
|
||||
} // end "recursion" //parallel
|
||||
|
||||
difference_type final_left = left, final_right = right;
|
||||
_DifferenceType __final_left = __left, __final_right = __right;
|
||||
|
||||
while (final_left < final_right)
|
||||
while (__final_left < __final_right)
|
||||
{
|
||||
// Go right until key is geq than pivot.
|
||||
while (pred(begin[final_left]) && final_left < final_right)
|
||||
++final_left;
|
||||
while (__pred(__begin[__final_left]) && __final_left < __final_right)
|
||||
++__final_left;
|
||||
|
||||
// Go left until key is less than pivot.
|
||||
while (!pred(begin[final_right]) && final_left < final_right)
|
||||
--final_right;
|
||||
while (!__pred(__begin[__final_right]) && __final_left < __final_right)
|
||||
--__final_right;
|
||||
|
||||
if (final_left == final_right)
|
||||
if (__final_left == __final_right)
|
||||
break;
|
||||
std::swap(begin[final_left], begin[final_right]);
|
||||
++final_left;
|
||||
--final_right;
|
||||
std::swap(__begin[__final_left], __begin[__final_right]);
|
||||
++__final_left;
|
||||
--__final_right;
|
||||
}
|
||||
|
||||
// All elements on the left side are < piv, all elements on the
|
||||
// right are >= piv
|
||||
delete[] reserved_left;
|
||||
delete[] reserved_right;
|
||||
delete[] __reserved_left;
|
||||
delete[] __reserved_right;
|
||||
|
||||
omp_destroy_lock(&result_lock);
|
||||
omp_destroy_lock(&__result_lock);
|
||||
|
||||
// Element "between" final_left and final_right might not have
|
||||
// Element "between" __final_left and __final_right might not have
|
||||
// been regarded yet
|
||||
if (final_left < n && !pred(begin[final_left]))
|
||||
if (__final_left < __n && !__pred(__begin[__final_left]))
|
||||
// Really swapped.
|
||||
return final_left;
|
||||
return __final_left;
|
||||
else
|
||||
return final_left + 1;
|
||||
return __final_left + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parallel implementation of std::nth_element().
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param nth Iterator of element that must be in position afterwards.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __nth _Iterator of element that must be in position afterwards.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth,
|
||||
RandomAccessIterator end, Comparator comp)
|
||||
parallel_nth_element(_RAIter __begin, _RAIter __nth,
|
||||
_RAIter __end, _Compare __comp)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
RandomAccessIterator split;
|
||||
random_number rng;
|
||||
_RAIter __split;
|
||||
_RandomNumber __rng;
|
||||
|
||||
difference_type minimum_length =
|
||||
std::max<difference_type>(2, _Settings::get().partition_minimal_n);
|
||||
_DifferenceType minimum_length =
|
||||
std::max<_DifferenceType>(2, _Settings::get().partition_minimal_n);
|
||||
|
||||
// Break if input range to small.
|
||||
while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
|
||||
while (static_cast<_SequenceIndex>(__end - __begin) >= minimum_length)
|
||||
{
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
RandomAccessIterator pivot_pos = begin + rng(n);
|
||||
_RAIter __pivot_pos = __begin + __rng(__n);
|
||||
|
||||
// Swap pivot_pos value to end.
|
||||
if (pivot_pos != (end - 1))
|
||||
std::swap(*pivot_pos, *(end - 1));
|
||||
pivot_pos = end - 1;
|
||||
// Swap __pivot_pos value to end.
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
|
||||
// XXX Comparator must have first_value_type, second_value_type,
|
||||
// result_type
|
||||
// Comparator == __gnu_parallel::lexicographic<S, int,
|
||||
// __gnu_parallel::less<S, S> >
|
||||
// pivot_pos == std::pair<S, int>*
|
||||
// XXX binder2nd only for RandomAccessIterators??
|
||||
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
|
||||
pred(comp, *pivot_pos);
|
||||
// XXX _Compare must have first__ValueType, second__ValueType,
|
||||
// _ResultType
|
||||
// _Compare == __gnu_parallel::_Lexicographic<S, int,
|
||||
// __gnu_parallel::_Less<S, S> >
|
||||
// __pivot_pos == std::pair<S, int>*
|
||||
// XXX binder2nd only for _RAIters??
|
||||
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
|
||||
// Divide, leave pivot unchanged in last place.
|
||||
RandomAccessIterator split_pos1, split_pos2;
|
||||
split_pos1 = begin + parallel_partition(begin, end - 1, pred,
|
||||
get_max_threads());
|
||||
_RAIter __split_pos1, __split_pos2;
|
||||
__split_pos1 = __begin + __parallel_partition(__begin, __end - 1, __pred,
|
||||
__get_max_threads());
|
||||
|
||||
// Left side: < pivot_pos; right side: >= pivot_pos
|
||||
// Left side: < __pivot_pos; __right side: >= __pivot_pos
|
||||
|
||||
// Swap pivot back to middle.
|
||||
if (split_pos1 != pivot_pos)
|
||||
std::swap(*split_pos1, *pivot_pos);
|
||||
pivot_pos = split_pos1;
|
||||
if (__split_pos1 != __pivot_pos)
|
||||
std::swap(*__split_pos1, *__pivot_pos);
|
||||
__pivot_pos = __split_pos1;
|
||||
|
||||
// In case all elements are equal, split_pos1 == 0
|
||||
if ((split_pos1 + 1 - begin) < (n >> 7)
|
||||
|| (end - split_pos1) < (n >> 7))
|
||||
// In case all elements are equal, __split_pos1 == 0
|
||||
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|
||||
|| (__end - __split_pos1) < (__n >> 7))
|
||||
{
|
||||
// Very unequal split, one part smaller than one 128th
|
||||
// elements not strictly larger than the pivot.
|
||||
__gnu_parallel::unary_negate<__gnu_parallel::
|
||||
binder1st<Comparator, value_type, value_type, bool>, value_type>
|
||||
pred(__gnu_parallel::binder1st<Comparator, value_type,
|
||||
value_type, bool>(comp, *pivot_pos));
|
||||
__gnu_parallel::__unary_negate<__gnu_parallel::
|
||||
__binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType>
|
||||
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
|
||||
_ValueType, bool>(__comp, *__pivot_pos));
|
||||
|
||||
// Find other end of pivot-equal range.
|
||||
split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
|
||||
end, pred);
|
||||
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
|
||||
__end, __pred);
|
||||
}
|
||||
else
|
||||
// Only skip the pivot.
|
||||
split_pos2 = split_pos1 + 1;
|
||||
__split_pos2 = __split_pos1 + 1;
|
||||
|
||||
// Compare iterators.
|
||||
if (split_pos2 <= nth)
|
||||
begin = split_pos2;
|
||||
else if (nth < split_pos1)
|
||||
end = split_pos1;
|
||||
if (__split_pos2 <= __nth)
|
||||
__begin = __split_pos2;
|
||||
else if (__nth < __split_pos1)
|
||||
__end = __split_pos1;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// Only at most _Settings::partition_minimal_n elements left.
|
||||
__gnu_sequential::sort(begin, end, comp);
|
||||
// Only at most _Settings::partition_minimal_n __elements __left.
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
}
|
||||
|
||||
/** @brief Parallel implementation of std::partial_sort().
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param middle Sort until this position.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator. */
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __middle Sort until this position.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator. */
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
parallel_partial_sort(RandomAccessIterator begin,
|
||||
RandomAccessIterator middle,
|
||||
RandomAccessIterator end, Comparator comp)
|
||||
parallel_partial_sort(_RAIter __begin,
|
||||
_RAIter __middle,
|
||||
_RAIter __end, _Compare __comp)
|
||||
{
|
||||
parallel_nth_element(begin, middle, end, comp);
|
||||
std::sort(begin, middle, comp);
|
||||
parallel_nth_element(__begin, __middle, __end, __comp);
|
||||
std::sort(__begin, __middle, __comp);
|
||||
}
|
||||
|
||||
} //namespace __gnu_parallel
|
||||
|
|
|
|||
|
|
@ -45,99 +45,99 @@ namespace __gnu_parallel
|
|||
* atomic access. push_front() and pop_front() must not be called
|
||||
* concurrently to each other, while pop_back() can be called
|
||||
* concurrently at all times.
|
||||
* @c empty(), @c size(), and @c top() are intentionally not provided.
|
||||
* @__c empty(), @__c size(), and @__c top() are intentionally not provided.
|
||||
* Calling them would not make sense in a concurrent setting.
|
||||
* @param T Contained element type. */
|
||||
template<typename T>
|
||||
class RestrictedBoundedConcurrentQueue
|
||||
* @param _Tp Contained element type. */
|
||||
template<typename _Tp>
|
||||
class _RestrictedBoundedConcurrentQueue
|
||||
{
|
||||
private:
|
||||
/** @brief Array of elements, seen as cyclic buffer. */
|
||||
T* base;
|
||||
_Tp* _M_base;
|
||||
|
||||
/** @brief Maximal number of elements contained at the same time. */
|
||||
sequence_index_t max_size;
|
||||
_SequenceIndex _M_max_size;
|
||||
|
||||
/** @brief Cyclic begin and end pointers contained in one
|
||||
/** @brief Cyclic __begin and __end pointers contained in one
|
||||
atomically changeable value. */
|
||||
_GLIBCXX_VOLATILE lcas_t borders;
|
||||
_GLIBCXX_VOLATILE _CASable _M_borders;
|
||||
|
||||
public:
|
||||
/** @brief Constructor. Not to be called concurrent, of course.
|
||||
* @param max_size Maximal number of elements to be contained. */
|
||||
RestrictedBoundedConcurrentQueue(sequence_index_t max_size)
|
||||
* @param _M_max_size Maximal number of elements to be contained. */
|
||||
_RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size)
|
||||
{
|
||||
this->max_size = max_size;
|
||||
base = new T[max_size];
|
||||
borders = encode2(0, 0);
|
||||
this->_M_max_size = _M_max_size;
|
||||
_M_base = new _Tp[_M_max_size];
|
||||
_M_borders = __encode2(0, 0);
|
||||
#pragma omp flush
|
||||
}
|
||||
|
||||
/** @brief Destructor. Not to be called concurrent, of course. */
|
||||
~RestrictedBoundedConcurrentQueue()
|
||||
{ delete[] base; }
|
||||
~_RestrictedBoundedConcurrentQueue()
|
||||
{ delete[] _M_base; }
|
||||
|
||||
/** @brief Pushes one element into the queue at the front end.
|
||||
/** @brief Pushes one element into the queue at the front __end.
|
||||
* Must not be called concurrently with pop_front(). */
|
||||
void
|
||||
push_front(const T& t)
|
||||
push_front(const _Tp& __t)
|
||||
{
|
||||
lcas_t former_borders = borders;
|
||||
int former_front, former_back;
|
||||
decode2(former_borders, former_front, former_back);
|
||||
*(base + former_front % max_size) = t;
|
||||
_CASable __former_borders = _M_borders;
|
||||
int __former_front, __former_back;
|
||||
decode2(__former_borders, __former_front, __former_back);
|
||||
*(_M_base + __former_front % _M_max_size) = __t;
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
// Otherwise: front - back > max_size eventually.
|
||||
_GLIBCXX_PARALLEL_ASSERT(((former_front + 1) - former_back)
|
||||
<= max_size);
|
||||
// Otherwise: front - back > _M_max_size eventually.
|
||||
_GLIBCXX_PARALLEL_ASSERT(((__former_front + 1) - __former_back)
|
||||
<= _M_max_size);
|
||||
#endif
|
||||
fetch_and_add(&borders, encode2(1, 0));
|
||||
__fetch_and_add(&_M_borders, __encode2(1, 0));
|
||||
}
|
||||
|
||||
/** @brief Pops one element from the queue at the front end.
|
||||
/** @brief Pops one element from the queue at the front __end.
|
||||
* Must not be called concurrently with pop_front(). */
|
||||
bool
|
||||
pop_front(T& t)
|
||||
pop_front(_Tp& __t)
|
||||
{
|
||||
int former_front, former_back;
|
||||
int __former_front, __former_back;
|
||||
#pragma omp flush
|
||||
decode2(borders, former_front, former_back);
|
||||
while (former_front > former_back)
|
||||
decode2(_M_borders, __former_front, __former_back);
|
||||
while (__former_front > __former_back)
|
||||
{
|
||||
// Chance.
|
||||
lcas_t former_borders = encode2(former_front, former_back);
|
||||
lcas_t new_borders = encode2(former_front - 1, former_back);
|
||||
if (compare_and_swap(&borders, former_borders, new_borders))
|
||||
_CASable __former_borders = __encode2(__former_front, __former_back);
|
||||
_CASable __new_borders = __encode2(__former_front - 1, __former_back);
|
||||
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders))
|
||||
{
|
||||
t = *(base + (former_front - 1) % max_size);
|
||||
__t = *(_M_base + (__former_front - 1) % _M_max_size);
|
||||
return true;
|
||||
}
|
||||
#pragma omp flush
|
||||
decode2(borders, former_front, former_back);
|
||||
decode2(_M_borders, __former_front, __former_back);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @brief Pops one element from the queue at the front end.
|
||||
/** @brief Pops one element from the queue at the front __end.
|
||||
* Must not be called concurrently with pop_front(). */
|
||||
bool
|
||||
pop_back(T& t) //queue behavior
|
||||
pop_back(_Tp& __t) //queue behavior
|
||||
{
|
||||
int former_front, former_back;
|
||||
int __former_front, __former_back;
|
||||
#pragma omp flush
|
||||
decode2(borders, former_front, former_back);
|
||||
while (former_front > former_back)
|
||||
decode2(_M_borders, __former_front, __former_back);
|
||||
while (__former_front > __former_back)
|
||||
{
|
||||
// Chance.
|
||||
lcas_t former_borders = encode2(former_front, former_back);
|
||||
lcas_t new_borders = encode2(former_front, former_back + 1);
|
||||
if (compare_and_swap(&borders, former_borders, new_borders))
|
||||
_CASable __former_borders = __encode2(__former_front, __former_back);
|
||||
_CASable __new_borders = __encode2(__former_front, __former_back + 1);
|
||||
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders))
|
||||
{
|
||||
t = *(base + former_back % max_size);
|
||||
__t = *(_M_base + __former_back % _M_max_size);
|
||||
return true;
|
||||
}
|
||||
#pragma omp flush
|
||||
decode2(borders, former_front, former_back);
|
||||
decode2(_M_borders, __former_front, __former_back);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,140 +38,140 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Unbalanced quicksort divide step.
|
||||
* @param begin Begin iterator of subsequence.
|
||||
* @param end End iterator of subsequence.
|
||||
* @param comp Comparator.
|
||||
* @param pivot_rank Desired rank of the pivot.
|
||||
* @param num_samples Choose pivot from that many samples.
|
||||
* @param num_threads Number of threads that are allowed to work on
|
||||
* @param __begin Begin iterator of subsequence.
|
||||
* @param __end End iterator of subsequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __pivot_rank Desired __rank of the pivot.
|
||||
* @param __num_samples Choose pivot from that many samples.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
typename std::iterator_traits<RandomAccessIterator>::difference_type
|
||||
parallel_sort_qs_divide(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
Comparator comp, typename std::iterator_traits
|
||||
<RandomAccessIterator>::difference_type pivot_rank,
|
||||
template<typename _RAIter, typename _Compare>
|
||||
typename std::iterator_traits<_RAIter>::difference_type
|
||||
__parallel_sort_qs_divide(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Compare __comp, typename std::iterator_traits
|
||||
<_RAIter>::difference_type __pivot_rank,
|
||||
typename std::iterator_traits
|
||||
<RandomAccessIterator>::difference_type
|
||||
num_samples, thread_index_t num_threads)
|
||||
<_RAIter>::difference_type
|
||||
__num_samples, _ThreadIndex __num_threads)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
difference_type n = end - begin;
|
||||
num_samples = std::min(num_samples, n);
|
||||
_DifferenceType __n = __end - __begin;
|
||||
__num_samples = std::min(__num_samples, __n);
|
||||
|
||||
// Allocate uninitialized, to avoid default constructor.
|
||||
value_type* samples =
|
||||
static_cast<value_type*>(::operator new(num_samples
|
||||
* sizeof(value_type)));
|
||||
_ValueType* __samples =
|
||||
static_cast<_ValueType*>(::operator new(__num_samples
|
||||
* sizeof(_ValueType)));
|
||||
|
||||
for (difference_type s = 0; s < num_samples; ++s)
|
||||
for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
|
||||
{
|
||||
const unsigned long long index = static_cast<unsigned long long>(s)
|
||||
* n / num_samples;
|
||||
::new(&(samples[s])) value_type(begin[index]);
|
||||
const unsigned long long __index = static_cast<unsigned long long>(__s)
|
||||
* __n / __num_samples;
|
||||
::new(&(__samples[__s])) _ValueType(__begin[__index]);
|
||||
}
|
||||
|
||||
__gnu_sequential::sort(samples, samples + num_samples, comp);
|
||||
__gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
|
||||
|
||||
value_type& pivot = samples[pivot_rank * num_samples / n];
|
||||
_ValueType& pivot = __samples[__pivot_rank * __num_samples / __n];
|
||||
|
||||
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
|
||||
pred(comp, pivot);
|
||||
difference_type split =
|
||||
parallel_partition(begin, end, pred, num_threads);
|
||||
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, pivot);
|
||||
_DifferenceType __split =
|
||||
__parallel_partition(__begin, __end, __pred, __num_threads);
|
||||
|
||||
::operator delete(samples);
|
||||
::operator delete(__samples);
|
||||
|
||||
return split;
|
||||
return __split;
|
||||
}
|
||||
|
||||
/** @brief Unbalanced quicksort conquer step.
|
||||
* @param begin Begin iterator of subsequence.
|
||||
* @param end End iterator of subsequence.
|
||||
* @param comp Comparator.
|
||||
* @param num_threads Number of threads that are allowed to work on
|
||||
* @param __begin Begin iterator of subsequence.
|
||||
* @param __end End iterator of subsequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
parallel_sort_qs_conquer(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
Comparator comp,
|
||||
thread_index_t num_threads)
|
||||
__parallel_sort_qs_conquer(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
if (num_threads <= 1)
|
||||
if (__num_threads <= 1)
|
||||
{
|
||||
__gnu_sequential::sort(begin, end, comp);
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
return;
|
||||
}
|
||||
|
||||
difference_type n = end - begin, pivot_rank;
|
||||
_DifferenceType __n = __end - __begin, __pivot_rank;
|
||||
|
||||
if (n <= 1)
|
||||
if (__n <= 1)
|
||||
return;
|
||||
|
||||
thread_index_t num_threads_left;
|
||||
_ThreadIndex __num_threads_left;
|
||||
|
||||
if ((num_threads % 2) == 1)
|
||||
num_threads_left = num_threads / 2 + 1;
|
||||
if ((__num_threads % 2) == 1)
|
||||
__num_threads_left = __num_threads / 2 + 1;
|
||||
else
|
||||
num_threads_left = num_threads / 2;
|
||||
__num_threads_left = __num_threads / 2;
|
||||
|
||||
pivot_rank = n * num_threads_left / num_threads;
|
||||
__pivot_rank = __n * __num_threads_left / __num_threads;
|
||||
|
||||
difference_type split =
|
||||
parallel_sort_qs_divide(begin, end, comp, pivot_rank,
|
||||
_DifferenceType __split =
|
||||
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank,
|
||||
_Settings::get().sort_qs_num_samples_preset,
|
||||
num_threads);
|
||||
__num_threads);
|
||||
|
||||
#pragma omp parallel sections num_threads(2)
|
||||
{
|
||||
#pragma omp section
|
||||
parallel_sort_qs_conquer(begin, begin + split,
|
||||
comp, num_threads_left);
|
||||
__parallel_sort_qs_conquer(__begin, __begin + __split,
|
||||
__comp, __num_threads_left);
|
||||
#pragma omp section
|
||||
parallel_sort_qs_conquer(begin + split, end,
|
||||
comp, num_threads - num_threads_left);
|
||||
__parallel_sort_qs_conquer(__begin + __split, __end,
|
||||
__comp, __num_threads - __num_threads_left);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** @brief Unbalanced quicksort main call.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator input sequence, ignored.
|
||||
* @param comp Comparator.
|
||||
* @param num_threads Number of threads that are allowed to work on
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator input sequence, ignored.
|
||||
* @param __comp Comparator.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename Comparator>
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
parallel_sort_qs(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
Comparator comp,
|
||||
thread_index_t num_threads)
|
||||
__parallel_sort_qs(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_CALL(n)
|
||||
_GLIBCXX_CALL(__n)
|
||||
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
// At least one element per processor.
|
||||
if (num_threads > n)
|
||||
num_threads = static_cast<thread_index_t>(n);
|
||||
if (__num_threads > __n)
|
||||
__num_threads = static_cast<_ThreadIndex>(__n);
|
||||
|
||||
parallel_sort_qs_conquer(begin, begin + n, comp, num_threads);
|
||||
__parallel_sort_qs_conquer(__begin, __begin + __n, __comp, __num_threads);
|
||||
}
|
||||
|
||||
} //namespace __gnu_parallel
|
||||
|
|
|
|||
|
|
@ -38,84 +38,84 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Random number generator, based on the Mersenne twister. */
|
||||
class random_number
|
||||
class _RandomNumber
|
||||
{
|
||||
private:
|
||||
std::tr1::mt19937 mt;
|
||||
uint64 supremum;
|
||||
uint64 RAND_SUP;
|
||||
double supremum_reciprocal;
|
||||
double RAND_SUP_REC;
|
||||
std::tr1::mt19937 _M_mt;
|
||||
uint64 _M_supremum;
|
||||
uint64 _RAND_SUP;
|
||||
double _M_supremum_reciprocal;
|
||||
double _RAND_SUP_REC;
|
||||
|
||||
// Assumed to be twice as long as the usual random number.
|
||||
uint64 cache;
|
||||
uint64 __cache;
|
||||
|
||||
// Bit results.
|
||||
int bits_left;
|
||||
int __bits_left;
|
||||
|
||||
static uint32
|
||||
scale_down(uint64 x,
|
||||
__scale_down(uint64 __x,
|
||||
#if _GLIBCXX_SCALE_DOWN_FPU
|
||||
uint64 /*supremum*/, double supremum_reciprocal)
|
||||
uint64 /*_M_supremum*/, double _M_supremum_reciprocal)
|
||||
#else
|
||||
uint64 supremum, double /*supremum_reciprocal*/)
|
||||
uint64 _M_supremum, double /*_M_supremum_reciprocal*/)
|
||||
#endif
|
||||
{
|
||||
#if _GLIBCXX_SCALE_DOWN_FPU
|
||||
return uint32(x * supremum_reciprocal);
|
||||
return uint32(__x * _M_supremum_reciprocal);
|
||||
#else
|
||||
return static_cast<uint32>(x % supremum);
|
||||
return static_cast<uint32>(__x % _M_supremum);
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
/** @brief Default constructor. Seed with 0. */
|
||||
random_number()
|
||||
: mt(0), supremum(0x100000000ULL),
|
||||
RAND_SUP(1ULL << (sizeof(uint32) * 8)),
|
||||
supremum_reciprocal(double(supremum) / double(RAND_SUP)),
|
||||
RAND_SUP_REC(1.0 / double(RAND_SUP)),
|
||||
cache(0), bits_left(0) { }
|
||||
_RandomNumber()
|
||||
: _M_mt(0), _M_supremum(0x100000000ULL),
|
||||
_RAND_SUP(1ULL << (sizeof(uint32) * 8)),
|
||||
_M_supremum_reciprocal(double(_M_supremum) / double(_RAND_SUP)),
|
||||
_RAND_SUP_REC(1.0 / double(_RAND_SUP)),
|
||||
__cache(0), __bits_left(0) { }
|
||||
|
||||
/** @brief Constructor.
|
||||
* @param seed Random seed.
|
||||
* @param supremum Generate integer random numbers in the
|
||||
* interval @c [0,supremum). */
|
||||
random_number(uint32 seed, uint64 supremum = 0x100000000ULL)
|
||||
: mt(seed), supremum(supremum),
|
||||
RAND_SUP(1ULL << (sizeof(uint32) * 8)),
|
||||
supremum_reciprocal(double(supremum) / double(RAND_SUP)),
|
||||
RAND_SUP_REC(1.0 / double(RAND_SUP)),
|
||||
cache(0), bits_left(0) { }
|
||||
* @param __seed Random __seed.
|
||||
* @param _M_supremum Generate integer random numbers in the
|
||||
* interval @__c [0,_M_supremum). */
|
||||
_RandomNumber(uint32 __seed, uint64 _M_supremum = 0x100000000ULL)
|
||||
: _M_mt(__seed), _M_supremum(_M_supremum),
|
||||
_RAND_SUP(1ULL << (sizeof(uint32) * 8)),
|
||||
_M_supremum_reciprocal(double(_M_supremum) / double(_RAND_SUP)),
|
||||
_RAND_SUP_REC(1.0 / double(_RAND_SUP)),
|
||||
__cache(0), __bits_left(0) { }
|
||||
|
||||
/** @brief Generate unsigned random 32-bit integer. */
|
||||
uint32
|
||||
operator()()
|
||||
{ return scale_down(mt(), supremum, supremum_reciprocal); }
|
||||
{ return __scale_down(_M_mt(), _M_supremum, _M_supremum_reciprocal); }
|
||||
|
||||
/** @brief Generate unsigned random 32-bit integer in the
|
||||
interval @c [0,local_supremum). */
|
||||
interval @__c [0,local_supremum). */
|
||||
uint32
|
||||
operator()(uint64 local_supremum)
|
||||
{
|
||||
return scale_down(mt(), local_supremum,
|
||||
double(local_supremum * RAND_SUP_REC));
|
||||
return __scale_down(_M_mt(), local_supremum,
|
||||
double(local_supremum * _RAND_SUP_REC));
|
||||
}
|
||||
|
||||
/** @brief Generate a number of random bits, run-time parameter.
|
||||
* @param bits Number of bits to generate. */
|
||||
unsigned long
|
||||
genrand_bits(int bits)
|
||||
__genrand_bits(int bits)
|
||||
{
|
||||
unsigned long res = cache & ((1 << bits) - 1);
|
||||
cache = cache >> bits;
|
||||
bits_left -= bits;
|
||||
if (bits_left < 32)
|
||||
unsigned long __res = __cache & ((1 << bits) - 1);
|
||||
__cache = __cache >> bits;
|
||||
__bits_left -= bits;
|
||||
if (__bits_left < 32)
|
||||
{
|
||||
cache |= ((uint64(mt())) << bits_left);
|
||||
bits_left += 32;
|
||||
__cache |= ((uint64(_M_mt())) << __bits_left);
|
||||
__bits_left += 32;
|
||||
}
|
||||
return res;
|
||||
return __res;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -41,477 +41,477 @@ namespace __gnu_parallel
|
|||
{
|
||||
/** @brief Type to hold the index of a bin.
|
||||
*
|
||||
* Since many variables of this type are allocated, it should be
|
||||
* Since many variables of this _Self are allocated, it should be
|
||||
* chosen as small as possible.
|
||||
*/
|
||||
typedef unsigned short bin_index;
|
||||
typedef unsigned short _BinIndex;
|
||||
|
||||
/** @brief Data known to every thread participating in
|
||||
__gnu_parallel::parallel_random_shuffle(). */
|
||||
template<typename RandomAccessIterator>
|
||||
struct DRandomShufflingGlobalData
|
||||
__gnu_parallel::__parallel_random_shuffle(). */
|
||||
template<typename _RAIter>
|
||||
struct _DRandomShufflingGlobalData
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
/** @brief Begin iterator of the source. */
|
||||
RandomAccessIterator& source;
|
||||
/** @brief Begin iterator of the _M_source. */
|
||||
_RAIter& _M_source;
|
||||
|
||||
/** @brief Temporary arrays for each thread. */
|
||||
value_type** temporaries;
|
||||
_ValueType** _M_temporaries;
|
||||
|
||||
/** @brief Two-dimensional array to hold the thread-bin distribution.
|
||||
*
|
||||
* Dimensions (num_threads + 1) x (num_bins + 1). */
|
||||
difference_type** dist;
|
||||
* Dimensions (__num_threads + 1) __x (_M_num_bins + 1). */
|
||||
_DifferenceType** _M_dist;
|
||||
|
||||
/** @brief Start indexes of the threads' chunks. */
|
||||
difference_type* starts;
|
||||
/** @brief Start indexes of the threads' __chunks. */
|
||||
_DifferenceType* _M_starts;
|
||||
|
||||
/** @brief Number of the thread that will further process the
|
||||
corresponding bin. */
|
||||
thread_index_t* bin_proc;
|
||||
_ThreadIndex* _M_bin_proc;
|
||||
|
||||
/** @brief Number of bins to distribute to. */
|
||||
int num_bins;
|
||||
int _M_num_bins;
|
||||
|
||||
/** @brief Number of bits needed to address the bins. */
|
||||
int num_bits;
|
||||
int _M_num_bits;
|
||||
|
||||
/** @brief Constructor. */
|
||||
DRandomShufflingGlobalData(RandomAccessIterator& _source)
|
||||
: source(_source) { }
|
||||
_DRandomShufflingGlobalData(_RAIter& _source)
|
||||
: _M_source(_source) { }
|
||||
};
|
||||
|
||||
/** @brief Local data for a thread participating in
|
||||
__gnu_parallel::parallel_random_shuffle().
|
||||
__gnu_parallel::__parallel_random_shuffle().
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
||||
struct DRSSorterPU
|
||||
template<typename _RAIter, typename RandomNumberGenerator>
|
||||
struct _DRSSorterPU
|
||||
{
|
||||
/** @brief Number of threads participating in total. */
|
||||
int num_threads;
|
||||
int __num_threads;
|
||||
|
||||
/** @brief Begin index for bins taken care of by this thread. */
|
||||
bin_index bins_begin;
|
||||
/** @brief Begin __index for bins taken care of by this thread. */
|
||||
_BinIndex _M_bins_begin;
|
||||
|
||||
/** @brief End index for bins taken care of by this thread. */
|
||||
bin_index bins_end;
|
||||
/** @brief End __index for bins taken care of by this thread. */
|
||||
_BinIndex __bins_end;
|
||||
|
||||
/** @brief Random seed for this thread. */
|
||||
uint32 seed;
|
||||
/** @brief Random _M_seed for this thread. */
|
||||
uint32 _M_seed;
|
||||
|
||||
/** @brief Pointer to global data. */
|
||||
DRandomShufflingGlobalData<RandomAccessIterator>* sd;
|
||||
_DRandomShufflingGlobalData<_RAIter>* _M_sd;
|
||||
};
|
||||
|
||||
/** @brief Generate a random number in @c [0,2^logp).
|
||||
* @param logp Logarithm (basis 2) of the upper range bound.
|
||||
* @param rng Random number generator to use.
|
||||
/** @brief Generate a random number in @__c [0,2^logp).
|
||||
* @param logp Logarithm (basis 2) of the upper range __bound.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename RandomNumberGenerator>
|
||||
inline int
|
||||
random_number_pow2(int logp, RandomNumberGenerator& rng)
|
||||
{ return rng.genrand_bits(logp); }
|
||||
__random_number_pow2(int logp, RandomNumberGenerator& __rng)
|
||||
{ return __rng.__genrand_bits(logp); }
|
||||
|
||||
/** @brief Random shuffle code executed by each thread.
|
||||
* @param pus Array of thread-local data records. */
|
||||
template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
||||
* @param __pus Array of thread-local data records. */
|
||||
template<typename _RAIter, typename RandomNumberGenerator>
|
||||
void
|
||||
parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
|
||||
RandomNumberGenerator>* pus)
|
||||
__parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
|
||||
RandomNumberGenerator>* __pus)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
|
||||
DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DRSSorterPU<_RAIter, RandomNumberGenerator>* d = &__pus[__iam];
|
||||
_DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd;
|
||||
|
||||
// Indexing: dist[bin][processor]
|
||||
difference_type length = sd->starts[iam + 1] - sd->starts[iam];
|
||||
bin_index* oracles = new bin_index[length];
|
||||
difference_type* dist = new difference_type[sd->num_bins + 1];
|
||||
bin_index* bin_proc = new bin_index[sd->num_bins];
|
||||
value_type** temporaries = new value_type*[d->num_threads];
|
||||
// Indexing: _M_dist[bin][processor]
|
||||
_DifferenceType __length = _M_sd->_M_starts[__iam + 1] - _M_sd->_M_starts[__iam];
|
||||
_BinIndex* __oracles = new _BinIndex[__length];
|
||||
_DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1];
|
||||
_BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins];
|
||||
_ValueType** _M_temporaries = new _ValueType*[d->__num_threads];
|
||||
|
||||
// Compute oracles and count appearances.
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
||||
dist[b] = 0;
|
||||
int num_bits = sd->num_bits;
|
||||
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
|
||||
_M_dist[__b] = 0;
|
||||
int _M_num_bits = _M_sd->_M_num_bits;
|
||||
|
||||
random_number rng(d->seed);
|
||||
_RandomNumber __rng(d->_M_seed);
|
||||
|
||||
// First main loop.
|
||||
for (difference_type i = 0; i < length; ++i)
|
||||
for (_DifferenceType __i = 0; __i < __length; ++__i)
|
||||
{
|
||||
bin_index oracle = random_number_pow2(num_bits, rng);
|
||||
oracles[i] = oracle;
|
||||
_BinIndex __oracle = __random_number_pow2(_M_num_bits, __rng);
|
||||
__oracles[__i] = __oracle;
|
||||
|
||||
// To allow prefix (partial) sum.
|
||||
++(dist[oracle + 1]);
|
||||
++(_M_dist[__oracle + 1]);
|
||||
}
|
||||
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
||||
sd->dist[b][iam + 1] = dist[b];
|
||||
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
|
||||
_M_sd->_M_dist[__b][__iam + 1] = _M_dist[__b];
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
// Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
|
||||
// total number of items in bin s
|
||||
for (bin_index s = 0; s < sd->num_bins; ++s)
|
||||
__gnu_sequential::partial_sum(sd->dist[s + 1],
|
||||
sd->dist[s + 1] + d->num_threads + 1,
|
||||
sd->dist[s + 1]);
|
||||
// Sum up bins, _M_sd->_M_dist[__s + 1][d->__num_threads] now contains the
|
||||
// total number of items in bin __s
|
||||
for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s)
|
||||
__gnu_sequential::partial_sum(_M_sd->_M_dist[__s + 1],
|
||||
_M_sd->_M_dist[__s + 1] + d->__num_threads + 1,
|
||||
_M_sd->_M_dist[__s + 1]);
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
sequence_index_t offset = 0, global_offset = 0;
|
||||
for (bin_index s = 0; s < d->bins_begin; ++s)
|
||||
global_offset += sd->dist[s + 1][d->num_threads];
|
||||
_SequenceIndex __offset = 0, __global_offset = 0;
|
||||
for (_BinIndex __s = 0; __s < d->_M_bins_begin; ++__s)
|
||||
__global_offset += _M_sd->_M_dist[__s + 1][d->__num_threads];
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
|
||||
for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s)
|
||||
{
|
||||
for (int t = 0; t < d->num_threads + 1; ++t)
|
||||
sd->dist[s + 1][t] += offset;
|
||||
offset = sd->dist[s + 1][d->num_threads];
|
||||
for (int __t = 0; __t < d->__num_threads + 1; ++__t)
|
||||
_M_sd->_M_dist[__s + 1][__t] += __offset;
|
||||
__offset = _M_sd->_M_dist[__s + 1][d->__num_threads];
|
||||
}
|
||||
|
||||
sd->temporaries[iam] = static_cast<value_type*>(
|
||||
::operator new(sizeof(value_type) * offset));
|
||||
_M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>(
|
||||
::operator new(sizeof(_ValueType) * __offset));
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Draw local copies to avoid false sharing.
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
||||
dist[b] = sd->dist[b][iam];
|
||||
for (bin_index b = 0; b < sd->num_bins; ++b)
|
||||
bin_proc[b] = sd->bin_proc[b];
|
||||
for (thread_index_t t = 0; t < d->num_threads; ++t)
|
||||
temporaries[t] = sd->temporaries[t];
|
||||
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
|
||||
_M_dist[__b] = _M_sd->_M_dist[__b][__iam];
|
||||
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins; ++__b)
|
||||
_M_bin_proc[__b] = _M_sd->_M_bin_proc[__b];
|
||||
for (_ThreadIndex __t = 0; __t < d->__num_threads; ++__t)
|
||||
_M_temporaries[__t] = _M_sd->_M_temporaries[__t];
|
||||
|
||||
RandomAccessIterator source = sd->source;
|
||||
difference_type start = sd->starts[iam];
|
||||
_RAIter _M_source = _M_sd->_M_source;
|
||||
_DifferenceType __start = _M_sd->_M_starts[__iam];
|
||||
|
||||
// Distribute according to oracles, second main loop.
|
||||
for (difference_type i = 0; i < length; ++i)
|
||||
for (_DifferenceType __i = 0; __i < __length; ++__i)
|
||||
{
|
||||
bin_index target_bin = oracles[i];
|
||||
thread_index_t target_p = bin_proc[target_bin];
|
||||
_BinIndex target_bin = __oracles[__i];
|
||||
_ThreadIndex target_p = _M_bin_proc[target_bin];
|
||||
|
||||
// Last column [d->num_threads] stays unchanged.
|
||||
::new(&(temporaries[target_p][dist[target_bin + 1]++]))
|
||||
value_type(*(source + i + start));
|
||||
// Last column [d->__num_threads] stays unchanged.
|
||||
::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++]))
|
||||
_ValueType(*(_M_source + __i + __start));
|
||||
}
|
||||
|
||||
delete[] oracles;
|
||||
delete[] dist;
|
||||
delete[] bin_proc;
|
||||
delete[] temporaries;
|
||||
delete[] __oracles;
|
||||
delete[] _M_dist;
|
||||
delete[] _M_bin_proc;
|
||||
delete[] _M_temporaries;
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Shuffle bins internally.
|
||||
for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
|
||||
for (_BinIndex __b = d->_M_bins_begin; __b < d->__bins_end; ++__b)
|
||||
{
|
||||
value_type* begin =
|
||||
sd->temporaries[iam] +
|
||||
((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
|
||||
* end =
|
||||
sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
|
||||
sequential_random_shuffle(begin, end, rng);
|
||||
std::copy(begin, end, sd->source + global_offset +
|
||||
((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
|
||||
_ValueType* __begin =
|
||||
_M_sd->_M_temporaries[__iam] +
|
||||
((__b == d->_M_bins_begin) ? 0 : _M_sd->_M_dist[__b][d->__num_threads]),
|
||||
* __end =
|
||||
_M_sd->_M_temporaries[__iam] + _M_sd->_M_dist[__b + 1][d->__num_threads];
|
||||
__sequential_random_shuffle(__begin, __end, __rng);
|
||||
std::copy(__begin, __end, _M_sd->_M_source + __global_offset +
|
||||
((__b == d->_M_bins_begin) ? 0 : _M_sd->_M_dist[__b][d->__num_threads]));
|
||||
}
|
||||
|
||||
::operator delete(sd->temporaries[iam]);
|
||||
::operator delete(_M_sd->_M_temporaries[__iam]);
|
||||
}
|
||||
|
||||
/** @brief Round up to the next greater power of 2.
|
||||
* @param x Integer to round up */
|
||||
template<typename T>
|
||||
T
|
||||
round_up_to_pow2(T x)
|
||||
* @param __x _Integer to round up */
|
||||
template<typename _Tp>
|
||||
_Tp
|
||||
__round_up_to_pow2(_Tp __x)
|
||||
{
|
||||
if (x <= 1)
|
||||
if (__x <= 1)
|
||||
return 1;
|
||||
else
|
||||
return (T)1 << (__log2(x - 1) + 1);
|
||||
return (_Tp)1 << (__log2(__x - 1) + 1);
|
||||
}
|
||||
|
||||
/** @brief Main parallel random shuffle step.
|
||||
* @param begin Begin iterator of sequence.
|
||||
* @param end End iterator of sequence.
|
||||
* @param n Length of sequence.
|
||||
* @param num_threads Number of threads to use.
|
||||
* @param rng Random number generator to use.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __n Length of sequence.
|
||||
* @param __num_threads Number of threads to use.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
||||
template<typename _RAIter, typename RandomNumberGenerator>
|
||||
void
|
||||
parallel_random_shuffle_drs(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
__parallel_random_shuffle_drs(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
typename std::iterator_traits
|
||||
<RandomAccessIterator>::difference_type n,
|
||||
thread_index_t num_threads,
|
||||
RandomNumberGenerator& rng)
|
||||
<_RAIter>::difference_type __n,
|
||||
_ThreadIndex __num_threads,
|
||||
RandomNumberGenerator& __rng)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_GLIBCXX_CALL(n)
|
||||
_GLIBCXX_CALL(__n)
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
if (num_threads > n)
|
||||
num_threads = static_cast<thread_index_t>(n);
|
||||
if (__num_threads > __n)
|
||||
__num_threads = static_cast<_ThreadIndex>(__n);
|
||||
|
||||
bin_index num_bins, num_bins_cache;
|
||||
_BinIndex _M_num_bins, __num_bins_cache;
|
||||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
// Try the L1 cache first.
|
||||
|
||||
// Must fit into L1.
|
||||
num_bins_cache = std::max<difference_type>(
|
||||
1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
|
||||
num_bins_cache = round_up_to_pow2(num_bins_cache);
|
||||
__num_bins_cache = std::max<_DifferenceType>(
|
||||
1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
num_bins = std::min<difference_type>(n, num_bins_cache);
|
||||
_M_num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
|
||||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin.
|
||||
num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
|
||||
_M_num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
|
||||
#endif
|
||||
num_bins = round_up_to_pow2(num_bins);
|
||||
_M_num_bins = __round_up_to_pow2(_M_num_bins);
|
||||
|
||||
if (num_bins < num_bins_cache)
|
||||
if (_M_num_bins < __num_bins_cache)
|
||||
{
|
||||
#endif
|
||||
// Now try the L2 cache
|
||||
// Must fit into L2
|
||||
num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
|
||||
1, n / (__s.L2_cache_size / sizeof(value_type))));
|
||||
num_bins_cache = round_up_to_pow2(num_bins_cache);
|
||||
__num_bins_cache = static_cast<_BinIndex>(std::max<_DifferenceType>(
|
||||
1, __n / (__s.L2_cache_size / sizeof(_ValueType))));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2.
|
||||
num_bins = static_cast<bin_index>(
|
||||
std::min(n, static_cast<difference_type>(num_bins_cache)));
|
||||
_M_num_bins = static_cast<_BinIndex>(
|
||||
std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin.
|
||||
num_bins = std::min(
|
||||
static_cast<difference_type>(__s.TLB_size / 2), num_bins);
|
||||
_M_num_bins = std::min(
|
||||
static_cast<_DifferenceType>(__s.TLB_size / 2), _M_num_bins);
|
||||
#endif
|
||||
num_bins = round_up_to_pow2(num_bins);
|
||||
_M_num_bins = __round_up_to_pow2(_M_num_bins);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
}
|
||||
#endif
|
||||
|
||||
num_threads = std::min<bin_index>(num_threads, num_bins);
|
||||
__num_threads = std::min<_BinIndex>(__num_threads, _M_num_bins);
|
||||
|
||||
if (num_threads <= 1)
|
||||
return sequential_random_shuffle(begin, end, rng);
|
||||
if (__num_threads <= 1)
|
||||
return __sequential_random_shuffle(__begin, __end, __rng);
|
||||
|
||||
DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
|
||||
DRSSorterPU<RandomAccessIterator, random_number >* pus;
|
||||
difference_type* starts;
|
||||
_DRandomShufflingGlobalData<_RAIter> _M_sd(__begin);
|
||||
_DRSSorterPU<_RAIter, _RandomNumber >* __pus;
|
||||
_DifferenceType* _M_starts;
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
thread_index_t num_threads = omp_get_num_threads();
|
||||
_ThreadIndex __num_threads = omp_get_num_threads();
|
||||
# pragma omp single
|
||||
{
|
||||
pus = new DRSSorterPU<RandomAccessIterator, random_number>
|
||||
[num_threads];
|
||||
__pus = new _DRSSorterPU<_RAIter, _RandomNumber>
|
||||
[__num_threads];
|
||||
|
||||
sd.temporaries = new value_type*[num_threads];
|
||||
sd.dist = new difference_type*[num_bins + 1];
|
||||
sd.bin_proc = new thread_index_t[num_bins];
|
||||
for (bin_index b = 0; b < num_bins + 1; ++b)
|
||||
sd.dist[b] = new difference_type[num_threads + 1];
|
||||
for (bin_index b = 0; b < (num_bins + 1); ++b)
|
||||
_M_sd._M_temporaries = new _ValueType*[__num_threads];
|
||||
_M_sd._M_dist = new _DifferenceType*[_M_num_bins + 1];
|
||||
_M_sd._M_bin_proc = new _ThreadIndex[_M_num_bins];
|
||||
for (_BinIndex __b = 0; __b < _M_num_bins + 1; ++__b)
|
||||
_M_sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
|
||||
for (_BinIndex __b = 0; __b < (_M_num_bins + 1); ++__b)
|
||||
{
|
||||
sd.dist[0][0] = 0;
|
||||
sd.dist[b][0] = 0;
|
||||
_M_sd._M_dist[0][0] = 0;
|
||||
_M_sd._M_dist[__b][0] = 0;
|
||||
}
|
||||
starts = sd.starts = new difference_type[num_threads + 1];
|
||||
_M_starts = _M_sd._M_starts = new _DifferenceType[__num_threads + 1];
|
||||
int bin_cursor = 0;
|
||||
sd.num_bins = num_bins;
|
||||
sd.num_bits = __log2(num_bins);
|
||||
_M_sd._M_num_bins = _M_num_bins;
|
||||
_M_sd._M_num_bits = __log2(_M_num_bins);
|
||||
|
||||
difference_type chunk_length = n / num_threads,
|
||||
split = n % num_threads, start = 0;
|
||||
difference_type bin_chunk_length = num_bins / num_threads,
|
||||
bin_split = num_bins % num_threads;
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
_DifferenceType __chunk_length = __n / __num_threads,
|
||||
__split = __n % __num_threads, __start = 0;
|
||||
_DifferenceType bin_chunk_length = _M_num_bins / __num_threads,
|
||||
bin_split = _M_num_bins % __num_threads;
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
starts[i] = start;
|
||||
start += (i < split) ? (chunk_length + 1) : chunk_length;
|
||||
int j = pus[i].bins_begin = bin_cursor;
|
||||
_M_starts[__i] = __start;
|
||||
__start += (__i < __split) ? (__chunk_length + 1) : __chunk_length;
|
||||
int __j = __pus[__i]._M_bins_begin = bin_cursor;
|
||||
|
||||
// Range of bins for this processor.
|
||||
bin_cursor += (i < bin_split) ?
|
||||
bin_cursor += (__i < bin_split) ?
|
||||
(bin_chunk_length + 1) : bin_chunk_length;
|
||||
pus[i].bins_end = bin_cursor;
|
||||
for (; j < bin_cursor; ++j)
|
||||
sd.bin_proc[j] = i;
|
||||
pus[i].num_threads = num_threads;
|
||||
pus[i].seed = rng(std::numeric_limits<uint32>::max());
|
||||
pus[i].sd = &sd;
|
||||
__pus[__i].__bins_end = bin_cursor;
|
||||
for (; __j < bin_cursor; ++__j)
|
||||
_M_sd._M_bin_proc[__j] = __i;
|
||||
__pus[__i].__num_threads = __num_threads;
|
||||
__pus[__i]._M_seed = __rng(std::numeric_limits<uint32>::max());
|
||||
__pus[__i]._M_sd = &_M_sd;
|
||||
}
|
||||
starts[num_threads] = start;
|
||||
_M_starts[__num_threads] = __start;
|
||||
} //single
|
||||
// Now shuffle in parallel.
|
||||
parallel_random_shuffle_drs_pu(pus);
|
||||
__parallel_random_shuffle_drs_pu(__pus);
|
||||
} // parallel
|
||||
|
||||
delete[] starts;
|
||||
delete[] sd.bin_proc;
|
||||
for (int s = 0; s < (num_bins + 1); ++s)
|
||||
delete[] sd.dist[s];
|
||||
delete[] sd.dist;
|
||||
delete[] sd.temporaries;
|
||||
delete[] _M_starts;
|
||||
delete[] _M_sd._M_bin_proc;
|
||||
for (int __s = 0; __s < (_M_num_bins + 1); ++__s)
|
||||
delete[] _M_sd._M_dist[__s];
|
||||
delete[] _M_sd._M_dist;
|
||||
delete[] _M_sd._M_temporaries;
|
||||
|
||||
delete[] pus;
|
||||
delete[] __pus;
|
||||
}
|
||||
|
||||
/** @brief Sequential cache-efficient random shuffle.
|
||||
* @param begin Begin iterator of sequence.
|
||||
* @param end End iterator of sequence.
|
||||
* @param rng Random number generator to use.
|
||||
/** @brief Sequential __cache-efficient random shuffle.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
||||
template<typename _RAIter, typename RandomNumberGenerator>
|
||||
void
|
||||
sequential_random_shuffle(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
RandomNumberGenerator& rng)
|
||||
__sequential_random_shuffle(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
RandomNumberGenerator& __rng)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
difference_type n = end - begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
bin_index num_bins, num_bins_cache;
|
||||
_BinIndex _M_num_bins, __num_bins_cache;
|
||||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
// Try the L1 cache first, must fit into L1.
|
||||
num_bins_cache =
|
||||
std::max<difference_type>
|
||||
(1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
|
||||
num_bins_cache = round_up_to_pow2(num_bins_cache);
|
||||
__num_bins_cache =
|
||||
std::max<_DifferenceType>
|
||||
(1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size
|
||||
num_bins = std::min(n, (difference_type)num_bins_cache);
|
||||
_M_num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin
|
||||
num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
|
||||
_M_num_bins = std::min((_DifferenceType)__s.TLB_size / 2, _M_num_bins);
|
||||
#endif
|
||||
num_bins = round_up_to_pow2(num_bins);
|
||||
_M_num_bins = __round_up_to_pow2(_M_num_bins);
|
||||
|
||||
if (num_bins < num_bins_cache)
|
||||
if (_M_num_bins < __num_bins_cache)
|
||||
{
|
||||
#endif
|
||||
// Now try the L2 cache, must fit into L2.
|
||||
num_bins_cache =
|
||||
static_cast<bin_index>(std::max<difference_type>(
|
||||
1, n / (__s.L2_cache_size / sizeof(value_type))));
|
||||
num_bins_cache = round_up_to_pow2(num_bins_cache);
|
||||
__num_bins_cache =
|
||||
static_cast<_BinIndex>(std::max<_DifferenceType>(
|
||||
1, __n / (__s.L2_cache_size / sizeof(_ValueType))));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
num_bins = static_cast<bin_index>
|
||||
(std::min(n, static_cast<difference_type>(num_bins_cache)));
|
||||
_M_num_bins = static_cast<_BinIndex>
|
||||
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
|
||||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin
|
||||
num_bins =
|
||||
std::min<difference_type>(__s.TLB_size / 2, num_bins);
|
||||
_M_num_bins =
|
||||
std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
|
||||
#endif
|
||||
num_bins = round_up_to_pow2(num_bins);
|
||||
_M_num_bins = __round_up_to_pow2(_M_num_bins);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
}
|
||||
#endif
|
||||
|
||||
int num_bits = __log2(num_bins);
|
||||
int _M_num_bits = __log2(_M_num_bins);
|
||||
|
||||
if (num_bins > 1)
|
||||
if (_M_num_bins > 1)
|
||||
{
|
||||
value_type* target = static_cast<value_type*>(
|
||||
::operator new(sizeof(value_type) * n));
|
||||
bin_index* oracles = new bin_index[n];
|
||||
difference_type* dist0 = new difference_type[num_bins + 1],
|
||||
* dist1 = new difference_type[num_bins + 1];
|
||||
_ValueType* __target = static_cast<_ValueType*>(
|
||||
::operator new(sizeof(_ValueType) * __n));
|
||||
_BinIndex* __oracles = new _BinIndex[__n];
|
||||
_DifferenceType* __dist0 = new _DifferenceType[_M_num_bins + 1],
|
||||
* __dist1 = new _DifferenceType[_M_num_bins + 1];
|
||||
|
||||
for (int b = 0; b < num_bins + 1; ++b)
|
||||
dist0[b] = 0;
|
||||
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
|
||||
__dist0[__b] = 0;
|
||||
|
||||
random_number bitrng(rng(0xFFFFFFFF));
|
||||
_RandomNumber bitrng(__rng(0xFFFFFFFF));
|
||||
|
||||
for (difference_type i = 0; i < n; ++i)
|
||||
for (_DifferenceType __i = 0; __i < __n; ++__i)
|
||||
{
|
||||
bin_index oracle = random_number_pow2(num_bits, bitrng);
|
||||
oracles[i] = oracle;
|
||||
_BinIndex __oracle = __random_number_pow2(_M_num_bits, bitrng);
|
||||
__oracles[__i] = __oracle;
|
||||
|
||||
// To allow prefix (partial) sum.
|
||||
++(dist0[oracle + 1]);
|
||||
++(__dist0[__oracle + 1]);
|
||||
}
|
||||
|
||||
// Sum up bins.
|
||||
__gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
|
||||
__gnu_sequential::partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0);
|
||||
|
||||
for (int b = 0; b < num_bins + 1; ++b)
|
||||
dist1[b] = dist0[b];
|
||||
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
|
||||
__dist1[__b] = __dist0[__b];
|
||||
|
||||
// Distribute according to oracles.
|
||||
for (difference_type i = 0; i < n; ++i)
|
||||
::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
|
||||
for (_DifferenceType __i = 0; __i < __n; ++__i)
|
||||
::new(&(__target[(__dist0[__oracles[__i]])++])) _ValueType(*(__begin + __i));
|
||||
|
||||
for (int b = 0; b < num_bins; ++b)
|
||||
for (int __b = 0; __b < _M_num_bins; ++__b)
|
||||
{
|
||||
sequential_random_shuffle(target + dist1[b],
|
||||
target + dist1[b + 1],
|
||||
rng);
|
||||
__sequential_random_shuffle(__target + __dist1[__b],
|
||||
__target + __dist1[__b + 1],
|
||||
__rng);
|
||||
}
|
||||
|
||||
// Copy elements back.
|
||||
std::copy(target, target + n, begin);
|
||||
std::copy(__target, __target + __n, __begin);
|
||||
|
||||
delete[] dist0;
|
||||
delete[] dist1;
|
||||
delete[] oracles;
|
||||
::operator delete(target);
|
||||
delete[] __dist0;
|
||||
delete[] __dist1;
|
||||
delete[] __oracles;
|
||||
::operator delete(__target);
|
||||
}
|
||||
else
|
||||
__gnu_sequential::random_shuffle(begin, end, rng);
|
||||
__gnu_sequential::random_shuffle(__begin, __end, __rng);
|
||||
}
|
||||
|
||||
/** @brief Parallel random public call.
|
||||
* @param begin Begin iterator of sequence.
|
||||
* @param end End iterator of sequence.
|
||||
* @param rng Random number generator to use.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
||||
template<typename _RAIter, typename RandomNumberGenerator>
|
||||
inline void
|
||||
parallel_random_shuffle(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
RandomNumberGenerator rng = random_number())
|
||||
__parallel_random_shuffle(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
RandomNumberGenerator __rng = _RandomNumber())
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
difference_type n = end - begin;
|
||||
parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
__parallel_random_shuffle_drs(__begin, __end, __n, __get_max_threads(), __rng) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/** @file parallel/search.h
|
||||
* @brief Parallel implementation base for std::search() and
|
||||
* @brief Parallel implementation __base for std::search() and
|
||||
* std::search_n().
|
||||
* This file is a GNU parallel extension to the Standard C++ Library.
|
||||
*/
|
||||
|
|
@ -42,130 +42,130 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
/**
|
||||
* @brief Precalculate advances for Knuth-Morris-Pratt algorithm.
|
||||
* @param elements Begin iterator of sequence to search for.
|
||||
* @param length Length of sequence to search for.
|
||||
* @param advances Returned offsets.
|
||||
* @brief Precalculate __advances for Knuth-Morris-Pratt algorithm.
|
||||
* @param __elements Begin iterator of sequence to search for.
|
||||
* @param __length Length of sequence to search for.
|
||||
* @param __advances Returned __offsets.
|
||||
*/
|
||||
template<typename RandomAccessIterator, typename _DifferenceTp>
|
||||
template<typename _RAIter, typename _DifferenceTp>
|
||||
void
|
||||
calc_borders(RandomAccessIterator elements, _DifferenceTp length,
|
||||
_DifferenceTp* off)
|
||||
__calc_borders(_RAIter __elements, _DifferenceTp __length,
|
||||
_DifferenceTp* __off)
|
||||
{
|
||||
typedef _DifferenceTp difference_type;
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
|
||||
off[0] = -1;
|
||||
if (length > 1)
|
||||
off[1] = 0;
|
||||
difference_type k = 0;
|
||||
for (difference_type j = 2; j <= length; j++)
|
||||
__off[0] = -1;
|
||||
if (__length > 1)
|
||||
__off[1] = 0;
|
||||
_DifferenceType __k = 0;
|
||||
for (_DifferenceType __j = 2; __j <= __length; __j++)
|
||||
{
|
||||
while ((k >= 0) && !(elements[k] == elements[j-1]))
|
||||
k = off[k];
|
||||
off[j] = ++k;
|
||||
while ((__k >= 0) && !(__elements[__k] == __elements[__j-1]))
|
||||
__k = __off[__k];
|
||||
__off[__j] = ++__k;
|
||||
}
|
||||
}
|
||||
|
||||
// Generic parallel find algorithm (requires random access iterator).
|
||||
|
||||
/** @brief Parallel std::search.
|
||||
* @param begin1 Begin iterator of first sequence.
|
||||
* @param end1 End iterator of first sequence.
|
||||
* @param begin2 Begin iterator of second sequence.
|
||||
* @param end2 End iterator of second sequence.
|
||||
* @param pred Find predicate.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __end2 End iterator of second sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @return Place of finding in first sequences. */
|
||||
template<typename _RandomAccessIterator1,
|
||||
typename _RandomAccessIterator2,
|
||||
typename Pred>
|
||||
_RandomAccessIterator1
|
||||
search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1,
|
||||
_RandomAccessIterator2 begin2, _RandomAccessIterator2 end2,
|
||||
Pred pred)
|
||||
template<typename __RAIter1,
|
||||
typename __RAIter2,
|
||||
typename _Pred>
|
||||
__RAIter1
|
||||
__search_template(__RAIter1 __begin1, __RAIter1 __end1,
|
||||
__RAIter2 __begin2, __RAIter2 __end2,
|
||||
_Pred __pred)
|
||||
{
|
||||
typedef std::iterator_traits<_RandomAccessIterator1> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<__RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_GLIBCXX_CALL((end1 - begin1) + (end2 - begin2));
|
||||
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2));
|
||||
|
||||
difference_type pattern_length = end2 - begin2;
|
||||
_DifferenceType __pattern_length = __end2 - __begin2;
|
||||
|
||||
// Pattern too short.
|
||||
if(pattern_length <= 0)
|
||||
return end1;
|
||||
if(__pattern_length <= 0)
|
||||
return __end1;
|
||||
|
||||
// Last point to start search.
|
||||
difference_type input_length = (end1 - begin1) - pattern_length;
|
||||
_DifferenceType __input_length = (__end1 - __begin1) - __pattern_length;
|
||||
|
||||
// Where is first occurrence of pattern? defaults to end.
|
||||
difference_type result = (end1 - begin1);
|
||||
difference_type *splitters;
|
||||
_DifferenceType __result = (__end1 - __begin1);
|
||||
_DifferenceType *__splitters;
|
||||
|
||||
// Pattern too long.
|
||||
if (input_length < 0)
|
||||
return end1;
|
||||
if (__input_length < 0)
|
||||
return __end1;
|
||||
|
||||
omp_lock_t result_lock;
|
||||
omp_init_lock(&result_lock);
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
thread_index_t num_threads =
|
||||
std::max<difference_type>(1,
|
||||
std::min<difference_type>(input_length, get_max_threads()));
|
||||
_ThreadIndex __num_threads =
|
||||
std::max<_DifferenceType>(1,
|
||||
std::min<_DifferenceType>(__input_length, __get_max_threads()));
|
||||
|
||||
difference_type advances[pattern_length];
|
||||
calc_borders(begin2, pattern_length, advances);
|
||||
_DifferenceType __advances[__pattern_length];
|
||||
__calc_borders(__begin2, __pattern_length, __advances);
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
splitters = new difference_type[num_threads + 1];
|
||||
equally_split(input_length, num_threads, splitters);
|
||||
__num_threads = omp_get_num_threads();
|
||||
__splitters = new _DifferenceType[__num_threads + 1];
|
||||
equally_split(__input_length, __num_threads, __splitters);
|
||||
}
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
difference_type start = splitters[iam], stop = splitters[iam + 1];
|
||||
_DifferenceType __start = __splitters[__iam], __stop = __splitters[__iam + 1];
|
||||
|
||||
difference_type pos_in_pattern = 0;
|
||||
bool found_pattern = false;
|
||||
_DifferenceType __pos_in_pattern = 0;
|
||||
bool __found_pattern = false;
|
||||
|
||||
while (start <= stop && !found_pattern)
|
||||
while (__start <= __stop && !__found_pattern)
|
||||
{
|
||||
// Get new value of result.
|
||||
#pragma omp flush(result)
|
||||
#pragma omp flush(__result)
|
||||
// No chance for this thread to find first occurrence.
|
||||
if (result < start)
|
||||
if (__result < __start)
|
||||
break;
|
||||
while (pred(begin1[start + pos_in_pattern],
|
||||
begin2[pos_in_pattern]))
|
||||
while (__pred(__begin1[__start + __pos_in_pattern],
|
||||
__begin2[__pos_in_pattern]))
|
||||
{
|
||||
++pos_in_pattern;
|
||||
if (pos_in_pattern == pattern_length)
|
||||
++__pos_in_pattern;
|
||||
if (__pos_in_pattern == __pattern_length)
|
||||
{
|
||||
// Found new candidate for result.
|
||||
omp_set_lock(&result_lock);
|
||||
result = std::min(result, start);
|
||||
omp_unset_lock(&result_lock);
|
||||
omp_set_lock(&__result_lock);
|
||||
__result = std::min(__result, __start);
|
||||
omp_unset_lock(&__result_lock);
|
||||
|
||||
found_pattern = true;
|
||||
__found_pattern = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Make safe jump.
|
||||
start += (pos_in_pattern - advances[pos_in_pattern]);
|
||||
pos_in_pattern =
|
||||
(advances[pos_in_pattern] < 0) ? 0 : advances[pos_in_pattern];
|
||||
__start += (__pos_in_pattern - __advances[__pos_in_pattern]);
|
||||
__pos_in_pattern =
|
||||
(__advances[__pos_in_pattern] < 0) ? 0 : __advances[__pos_in_pattern];
|
||||
}
|
||||
} //parallel
|
||||
|
||||
omp_destroy_lock(&result_lock);
|
||||
omp_destroy_lock(&__result_lock);
|
||||
|
||||
delete[] splitters;
|
||||
delete[] __splitters;
|
||||
|
||||
// Return iterator on found element.
|
||||
return (begin1 + result);
|
||||
return (__begin1 + __result);
|
||||
}
|
||||
} // end namespace
|
||||
|
||||
|
|
|
|||
|
|
@ -41,482 +41,482 @@
|
|||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
template<typename InputIterator, typename OutputIterator>
|
||||
OutputIterator
|
||||
copy_tail(std::pair<InputIterator, InputIterator> b,
|
||||
std::pair<InputIterator, InputIterator> e, OutputIterator r)
|
||||
template<typename _IIter, typename _OutputIterator>
|
||||
_OutputIterator
|
||||
copy_tail(std::pair<_IIter, _IIter> __b,
|
||||
std::pair<_IIter, _IIter> __e, _OutputIterator __r)
|
||||
{
|
||||
if (b.first != e.first)
|
||||
if (__b.first != __e.first)
|
||||
{
|
||||
do
|
||||
{
|
||||
*r++ = *b.first++;
|
||||
*__r++ = *__b.first++;
|
||||
}
|
||||
while (b.first != e.first);
|
||||
while (__b.first != __e.first);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (b.second != e.second)
|
||||
*r++ = *b.second++;
|
||||
while (__b.second != __e.second)
|
||||
*__r++ = *__b.second++;
|
||||
}
|
||||
return r;
|
||||
return __r;
|
||||
}
|
||||
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename Comparator>
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct symmetric_difference_func
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
symmetric_difference_func(Comparator c) : comp(c) {}
|
||||
symmetric_difference_func(_Compare __c) : __comp(__c) {}
|
||||
|
||||
Comparator comp;
|
||||
_Compare __comp;
|
||||
|
||||
OutputIterator
|
||||
invoke(InputIterator a, InputIterator b,
|
||||
InputIterator c, InputIterator d,
|
||||
OutputIterator r) const
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d,
|
||||
_OutputIterator __r) const
|
||||
{
|
||||
while (a != b && c != d)
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (comp(*a, *c))
|
||||
if (__comp(*__a, *__c))
|
||||
{
|
||||
*r = *a;
|
||||
++a;
|
||||
++r;
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__r;
|
||||
}
|
||||
else if (comp(*c, *a))
|
||||
else if (__comp(*__c, *__a))
|
||||
{
|
||||
*r = *c;
|
||||
++c;
|
||||
++r;
|
||||
*__r = *__c;
|
||||
++__c;
|
||||
++__r;
|
||||
}
|
||||
else
|
||||
{
|
||||
++a;
|
||||
++c;
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
return std::copy(c, d, std::copy(a, b, r));
|
||||
return std::copy(__c, d, std::copy(__a, __b, __r));
|
||||
}
|
||||
|
||||
difference_type
|
||||
count(InputIterator a, InputIterator b,
|
||||
InputIterator c, InputIterator d) const
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
{
|
||||
difference_type counter = 0;
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (a != b && c != d)
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (comp(*a, *c))
|
||||
if (__comp(*__a, *__c))
|
||||
{
|
||||
++a;
|
||||
++counter;
|
||||
++__a;
|
||||
++__counter;
|
||||
}
|
||||
else if (comp(*c, *a))
|
||||
else if (__comp(*__c, *__a))
|
||||
{
|
||||
++c;
|
||||
++counter;
|
||||
++__c;
|
||||
++__counter;
|
||||
}
|
||||
else
|
||||
{
|
||||
++a;
|
||||
++c;
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
|
||||
return counter + (b - a) + (d - c);
|
||||
return __counter + (__b - __a) + (d - __c);
|
||||
}
|
||||
|
||||
OutputIterator
|
||||
first_empty(InputIterator c, InputIterator d, OutputIterator out) const
|
||||
{ return std::copy(c, d, out); }
|
||||
_OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return std::copy(__c, d, __out); }
|
||||
|
||||
OutputIterator
|
||||
second_empty(InputIterator a, InputIterator b, OutputIterator out) const
|
||||
{ return std::copy(a, b, out); }
|
||||
_OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
|
||||
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename Comparator>
|
||||
struct difference_func
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct __difference_func
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
difference_func(Comparator c) : comp(c) {}
|
||||
__difference_func(_Compare __c) : __comp(__c) {}
|
||||
|
||||
Comparator comp;
|
||||
_Compare __comp;
|
||||
|
||||
OutputIterator
|
||||
invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
|
||||
OutputIterator r) const
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
|
||||
_OutputIterator __r) const
|
||||
{
|
||||
while (a != b && c != d)
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (comp(*a, *c))
|
||||
if (__comp(*__a, *__c))
|
||||
{
|
||||
*r = *a;
|
||||
++a;
|
||||
++r;
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__r;
|
||||
}
|
||||
else if (comp(*c, *a))
|
||||
{ ++c; }
|
||||
else if (__comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++a;
|
||||
++c;
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
return std::copy(a, b, r);
|
||||
return std::copy(__a, __b, __r);
|
||||
}
|
||||
|
||||
difference_type
|
||||
count(InputIterator a, InputIterator b,
|
||||
InputIterator c, InputIterator d) const
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
{
|
||||
difference_type counter = 0;
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (a != b && c != d)
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (comp(*a, *c))
|
||||
if (__comp(*__a, *__c))
|
||||
{
|
||||
++a;
|
||||
++counter;
|
||||
++__a;
|
||||
++__counter;
|
||||
}
|
||||
else if (comp(*c, *a))
|
||||
{ ++c; }
|
||||
else if (__comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{ ++a; ++c; }
|
||||
{ ++__a; ++__c; }
|
||||
}
|
||||
|
||||
return counter + (b - a);
|
||||
return __counter + (__b - __a);
|
||||
}
|
||||
|
||||
inline OutputIterator
|
||||
first_empty(InputIterator c, InputIterator d, OutputIterator out) const
|
||||
{ return out; }
|
||||
inline _OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
|
||||
inline OutputIterator
|
||||
second_empty(InputIterator a, InputIterator b, OutputIterator out) const
|
||||
{ return std::copy(a, b, out); }
|
||||
inline _OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
|
||||
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename Comparator>
|
||||
struct intersection_func
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct __intersection_func
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
intersection_func(Comparator c) : comp(c) {}
|
||||
__intersection_func(_Compare __c) : __comp(__c) {}
|
||||
|
||||
Comparator comp;
|
||||
_Compare __comp;
|
||||
|
||||
OutputIterator
|
||||
invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
|
||||
OutputIterator r) const
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
|
||||
_OutputIterator __r) const
|
||||
{
|
||||
while (a != b && c != d)
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (comp(*a, *c))
|
||||
{ ++a; }
|
||||
else if (comp(*c, *a))
|
||||
{ ++c; }
|
||||
if (__comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (__comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
*r = *a;
|
||||
++a;
|
||||
++c;
|
||||
++r;
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__c;
|
||||
++__r;
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
return __r;
|
||||
}
|
||||
|
||||
difference_type
|
||||
count(InputIterator a, InputIterator b,
|
||||
InputIterator c, InputIterator d) const
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
{
|
||||
difference_type counter = 0;
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (a != b && c != d)
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (comp(*a, *c))
|
||||
{ ++a; }
|
||||
else if (comp(*c, *a))
|
||||
{ ++c; }
|
||||
if (__comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (__comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++a;
|
||||
++c;
|
||||
++counter;
|
||||
++__a;
|
||||
++__c;
|
||||
++__counter;
|
||||
}
|
||||
}
|
||||
|
||||
return counter;
|
||||
return __counter;
|
||||
}
|
||||
|
||||
inline OutputIterator
|
||||
first_empty(InputIterator c, InputIterator d, OutputIterator out) const
|
||||
{ return out; }
|
||||
inline _OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
|
||||
inline OutputIterator
|
||||
second_empty(InputIterator a, InputIterator b, OutputIterator out) const
|
||||
{ return out; }
|
||||
inline _OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
};
|
||||
|
||||
template<class InputIterator, class OutputIterator, class Comparator>
|
||||
struct union_func
|
||||
template<class _IIter, class _OutputIterator, class _Compare>
|
||||
struct __union_func
|
||||
{
|
||||
typedef typename std::iterator_traits<InputIterator>::difference_type
|
||||
difference_type;
|
||||
typedef typename std::iterator_traits<_IIter>::difference_type
|
||||
_DifferenceType;
|
||||
|
||||
union_func(Comparator c) : comp(c) {}
|
||||
__union_func(_Compare __c) : __comp(__c) {}
|
||||
|
||||
Comparator comp;
|
||||
_Compare __comp;
|
||||
|
||||
OutputIterator
|
||||
invoke(InputIterator a, const InputIterator b, InputIterator c,
|
||||
const InputIterator d, OutputIterator r) const
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, const _IIter __b, _IIter __c,
|
||||
const _IIter d, _OutputIterator __r) const
|
||||
{
|
||||
while (a != b && c != d)
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (comp(*a, *c))
|
||||
if (__comp(*__a, *__c))
|
||||
{
|
||||
*r = *a;
|
||||
++a;
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
}
|
||||
else if (comp(*c, *a))
|
||||
else if (__comp(*__c, *__a))
|
||||
{
|
||||
*r = *c;
|
||||
++c;
|
||||
*__r = *__c;
|
||||
++__c;
|
||||
}
|
||||
else
|
||||
{
|
||||
*r = *a;
|
||||
++a;
|
||||
++c;
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
++r;
|
||||
++__r;
|
||||
}
|
||||
return std::copy(c, d, std::copy(a, b, r));
|
||||
return std::copy(__c, d, std::copy(__a, __b, __r));
|
||||
}
|
||||
|
||||
difference_type
|
||||
count(InputIterator a, InputIterator b,
|
||||
InputIterator c, InputIterator d) const
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
{
|
||||
difference_type counter = 0;
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (a != b && c != d)
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (comp(*a, *c))
|
||||
{ ++a; }
|
||||
else if (comp(*c, *a))
|
||||
{ ++c; }
|
||||
if (__comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (__comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++a;
|
||||
++c;
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
++counter;
|
||||
++__counter;
|
||||
}
|
||||
|
||||
counter += (b - a);
|
||||
counter += (d - c);
|
||||
return counter;
|
||||
__counter += (__b - __a);
|
||||
__counter += (d - __c);
|
||||
return __counter;
|
||||
}
|
||||
|
||||
inline OutputIterator
|
||||
first_empty(InputIterator c, InputIterator d, OutputIterator out) const
|
||||
{ return std::copy(c, d, out); }
|
||||
inline _OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return std::copy(__c, d, __out); }
|
||||
|
||||
inline OutputIterator
|
||||
second_empty(InputIterator a, InputIterator b, OutputIterator out) const
|
||||
{ return std::copy(a, b, out); }
|
||||
inline _OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename Operation>
|
||||
OutputIterator
|
||||
parallel_set_operation(InputIterator begin1, InputIterator end1,
|
||||
InputIterator begin2, InputIterator end2,
|
||||
OutputIterator result, Operation op)
|
||||
_OutputIterator
|
||||
__parallel_set_operation(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, Operation __op)
|
||||
{
|
||||
_GLIBCXX_CALL((end1 - begin1) + (end2 - begin2))
|
||||
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2))
|
||||
|
||||
typedef std::iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
if (begin1 == end1)
|
||||
return op.first_empty(begin2, end2, result);
|
||||
if (__begin1 == __end1)
|
||||
return __op.__first_empty(__begin2, __end2, __result);
|
||||
|
||||
if (begin2 == end2)
|
||||
return op.second_empty(begin1, end1, result);
|
||||
if (__begin2 == __end2)
|
||||
return __op.__second_empty(__begin1, __end1, __result);
|
||||
|
||||
const difference_type size = (end1 - begin1) + (end2 - begin2);
|
||||
const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2);
|
||||
|
||||
const iterator_pair sequence[ 2 ] =
|
||||
{ std::make_pair(begin1, end1), std::make_pair(begin2, end2) } ;
|
||||
OutputIterator return_value = result;
|
||||
difference_type *borders;
|
||||
iterator_pair *block_begins;
|
||||
difference_type* lengths;
|
||||
const _IteratorPair __sequence[ 2 ] =
|
||||
{ std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) } ;
|
||||
_OutputIterator return_value = __result;
|
||||
_DifferenceType *__borders;
|
||||
_IteratorPair *__block_begins;
|
||||
_DifferenceType* __lengths;
|
||||
|
||||
thread_index_t num_threads =
|
||||
std::min<difference_type>(get_max_threads(),
|
||||
std::min(end1 - begin1, end2 - begin2));
|
||||
_ThreadIndex __num_threads =
|
||||
std::min<_DifferenceType>(__get_max_threads(),
|
||||
std::min(__end1 - __begin1, __end2 - __begin2));
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
borders = new difference_type[num_threads + 2];
|
||||
equally_split(size, num_threads + 1, borders);
|
||||
block_begins = new iterator_pair[num_threads + 1];
|
||||
// Very start.
|
||||
block_begins[0] = std::make_pair(begin1, begin2);
|
||||
lengths = new difference_type[num_threads];
|
||||
__borders = new _DifferenceType[__num_threads + 2];
|
||||
equally_split(size, __num_threads + 1, __borders);
|
||||
__block_begins = new _IteratorPair[__num_threads + 1];
|
||||
// Very __start.
|
||||
__block_begins[0] = std::make_pair(__begin1, __begin2);
|
||||
__lengths = new _DifferenceType[__num_threads];
|
||||
} //single
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
// Result from multiseq_partition.
|
||||
InputIterator offset[2];
|
||||
const difference_type rank = borders[iam + 1];
|
||||
// _Result from multiseq_partition.
|
||||
_IIter __offset[2];
|
||||
const _DifferenceType __rank = __borders[__iam + 1];
|
||||
|
||||
multiseq_partition(sequence, sequence + 2, rank, offset, op.comp);
|
||||
multiseq_partition(__sequence, __sequence + 2, __rank, __offset, __op.__comp);
|
||||
|
||||
// allowed to read?
|
||||
// together
|
||||
// *(offset[ 0 ] - 1) == *offset[ 1 ]
|
||||
if (offset[ 0 ] != begin1 && offset[ 1 ] != end2
|
||||
&& !op.comp(*(offset[ 0 ] - 1), *offset[ 1 ])
|
||||
&& !op.comp(*offset[ 1 ], *(offset[ 0 ] - 1)))
|
||||
// *(__offset[ 0 ] - 1) == *__offset[ 1 ]
|
||||
if (__offset[ 0 ] != __begin1 && __offset[ 1 ] != __end2
|
||||
&& !__op.__comp(*(__offset[ 0 ] - 1), *__offset[ 1 ])
|
||||
&& !__op.__comp(*__offset[ 1 ], *(__offset[ 0 ] - 1)))
|
||||
{
|
||||
// Avoid split between globally equal elements: move one to
|
||||
// front in first sequence.
|
||||
--offset[ 0 ];
|
||||
--__offset[ 0 ];
|
||||
}
|
||||
|
||||
iterator_pair block_end = block_begins[ iam + 1 ] =
|
||||
iterator_pair(offset[ 0 ], offset[ 1 ]);
|
||||
_IteratorPair block_end = __block_begins[ __iam + 1 ] =
|
||||
_IteratorPair(__offset[ 0 ], __offset[ 1 ]);
|
||||
|
||||
// Make sure all threads have their block_begin result written out.
|
||||
# pragma omp barrier
|
||||
|
||||
iterator_pair block_begin = block_begins[ iam ];
|
||||
_IteratorPair __block_begin = __block_begins[ __iam ];
|
||||
|
||||
// Begin working for the first block, while the others except
|
||||
// the last start to count.
|
||||
if (iam == 0)
|
||||
if (__iam == 0)
|
||||
{
|
||||
// The first thread can copy already.
|
||||
lengths[ iam ] = op.invoke(block_begin.first, block_end.first,
|
||||
block_begin.second, block_end.second,
|
||||
result)
|
||||
- result;
|
||||
__lengths[ __iam ] = __op._M_invoke(__block_begin.first, block_end.first,
|
||||
__block_begin.second, block_end.second,
|
||||
__result)
|
||||
- __result;
|
||||
}
|
||||
else
|
||||
{
|
||||
lengths[ iam ] = op.count(block_begin.first, block_end.first,
|
||||
block_begin.second, block_end.second);
|
||||
__lengths[ __iam ] = __op.__count(__block_begin.first, block_end.first,
|
||||
__block_begin.second, block_end.second);
|
||||
}
|
||||
|
||||
// Make sure everyone wrote their lengths.
|
||||
# pragma omp barrier
|
||||
|
||||
OutputIterator r = result;
|
||||
_OutputIterator __r = __result;
|
||||
|
||||
if (iam == 0)
|
||||
if (__iam == 0)
|
||||
{
|
||||
// Do the last block.
|
||||
for (int i = 0; i < num_threads; ++i)
|
||||
r += lengths[i];
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
__r += __lengths[__i];
|
||||
|
||||
block_begin = block_begins[num_threads];
|
||||
__block_begin = __block_begins[__num_threads];
|
||||
|
||||
// Return the result iterator of the last block.
|
||||
return_value = op.invoke(
|
||||
block_begin.first, end1, block_begin.second, end2, r);
|
||||
return_value = __op._M_invoke(
|
||||
__block_begin.first, __end1, __block_begin.second, __end2, __r);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < iam; ++i)
|
||||
r += lengths[ i ];
|
||||
for (int __i = 0; __i < __iam; ++__i)
|
||||
__r += __lengths[ __i ];
|
||||
|
||||
// Reset begins for copy pass.
|
||||
op.invoke(block_begin.first, block_end.first,
|
||||
block_begin.second, block_end.second, r);
|
||||
__op._M_invoke(__block_begin.first, block_end.first,
|
||||
__block_begin.second, block_end.second, __r);
|
||||
}
|
||||
}
|
||||
return return_value;
|
||||
}
|
||||
|
||||
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename Comparator>
|
||||
inline OutputIterator
|
||||
parallel_set_union(InputIterator begin1, InputIterator end1,
|
||||
InputIterator begin2, InputIterator end2,
|
||||
OutputIterator result, Comparator comp)
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_union(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare __comp)
|
||||
{
|
||||
return parallel_set_operation(begin1, end1, begin2, end2, result,
|
||||
union_func< InputIterator, OutputIterator, Comparator>(comp));
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
|
||||
__union_func< _IIter, _OutputIterator, _Compare>(__comp));
|
||||
}
|
||||
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename Comparator>
|
||||
inline OutputIterator
|
||||
parallel_set_intersection(InputIterator begin1, InputIterator end1,
|
||||
InputIterator begin2, InputIterator end2,
|
||||
OutputIterator result, Comparator comp)
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_intersection(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare __comp)
|
||||
{
|
||||
return parallel_set_operation(begin1, end1, begin2, end2, result,
|
||||
intersection_func<InputIterator, OutputIterator, Comparator>(comp));
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
|
||||
__intersection_func<_IIter, _OutputIterator, _Compare>(__comp));
|
||||
}
|
||||
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename Comparator>
|
||||
inline OutputIterator
|
||||
parallel_set_difference(InputIterator begin1, InputIterator end1,
|
||||
InputIterator begin2, InputIterator end2,
|
||||
OutputIterator result, Comparator comp)
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_difference(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare __comp)
|
||||
{
|
||||
return parallel_set_operation(begin1, end1, begin2, end2, result,
|
||||
difference_func<InputIterator, OutputIterator, Comparator>(comp));
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
|
||||
__difference_func<_IIter, _OutputIterator, _Compare>(__comp));
|
||||
}
|
||||
|
||||
template<typename InputIterator,
|
||||
typename OutputIterator,
|
||||
typename Comparator>
|
||||
inline OutputIterator
|
||||
parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1,
|
||||
InputIterator begin2, InputIterator end2,
|
||||
OutputIterator result, Comparator comp)
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare __comp)
|
||||
{
|
||||
return parallel_set_operation(begin1, end1, begin2, end2, result,
|
||||
symmetric_difference_func<InputIterator, OutputIterator, Comparator>
|
||||
(comp));
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
|
||||
symmetric_difference_func<_IIter, _OutputIterator, _Compare>
|
||||
(__comp));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@
|
|||
* @section parallelization_decision
|
||||
* The decision whether to run an algorithm in parallel.
|
||||
*
|
||||
* There are several ways the user can switch on and off the parallel
|
||||
* There are several ways the user can switch on and __off the parallel
|
||||
* execution of an algorithm, both at compile- and run-time.
|
||||
*
|
||||
* Only sequential execution can be forced at compile-time. This
|
||||
|
|
@ -46,11 +46,11 @@
|
|||
*
|
||||
* To force sequential execution of an algorithm ultimately at
|
||||
* compile-time, the user must add the tag
|
||||
* __gnu_parallel::sequential_tag() to the end of the parameter list,
|
||||
* gnu_parallel::sequential_tag() to the end of the parameter list,
|
||||
* e. g.
|
||||
*
|
||||
* \code
|
||||
* std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
|
||||
* std::sort(__v.begin(), __v.end(), __gnu_parallel::sequential_tag());
|
||||
* \endcode
|
||||
*
|
||||
* This is compatible with all overloaded algorithm variants. No
|
||||
|
|
@ -60,18 +60,18 @@
|
|||
* If the algorithm call is not forced to be executed sequentially
|
||||
* at compile-time, the decision is made at run-time.
|
||||
* The global variable __gnu_parallel::_Settings::algorithm_strategy
|
||||
* is checked. It is a tristate variable corresponding to:
|
||||
* is checked. _It is a tristate variable corresponding to:
|
||||
*
|
||||
* a. force_sequential, meaning the sequential algorithm is executed.
|
||||
* b. force_parallel, meaning the parallel algorithm is executed.
|
||||
* c. heuristic
|
||||
* b. force_parallel, meaning the parallel algorithm is executed.
|
||||
* c. heuristic
|
||||
*
|
||||
* For heuristic, the parallel algorithm implementation is called
|
||||
* only if the input size is sufficiently large. For most
|
||||
* algorithms, the input size is the (combined) length of the input
|
||||
* sequence(s). The threshold can be set by the user, individually
|
||||
* sequence(__s). The threshold can be set by the user, individually
|
||||
* for each algorithm. The according variables are called
|
||||
* __gnu_parallel::_Settings::[algorithm]_minimal_n .
|
||||
* gnu_parallel::_Settings::[algorithm]_minimal_n .
|
||||
*
|
||||
* For some of the algorithms, there are even more tuning options,
|
||||
* e. g. the ability to choose from multiple algorithm variants. See
|
||||
|
|
@ -88,24 +88,24 @@
|
|||
/**
|
||||
* @brief Determine at compile(?)-time if the parallel variant of an
|
||||
* algorithm should be called.
|
||||
* @param c A condition that is convertible to bool that is overruled by
|
||||
* @param __c A condition that is convertible to bool that is overruled by
|
||||
* __gnu_parallel::_Settings::algorithm_strategy. Usually a decision
|
||||
* based on the input size.
|
||||
*/
|
||||
#define _GLIBCXX_PARALLEL_CONDITION(c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel))
|
||||
#define _GLIBCXX_PARALLEL_CONDITION(__c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::__get_max_threads() > 1 && (__c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel))
|
||||
|
||||
/*
|
||||
inline bool
|
||||
parallel_condition(bool c)
|
||||
parallel_condition(bool __c)
|
||||
{
|
||||
bool ret = false;
|
||||
const _Settings& s = _Settings::get();
|
||||
if (s.algorithm_strategy != force_seqential)
|
||||
const _Settings& __s = _Settings::get();
|
||||
if (__s.algorithm_strategy != force_seqential)
|
||||
{
|
||||
if (s.algorithm_strategy == force_parallel)
|
||||
if (__s.algorithm_strategy == force_parallel)
|
||||
ret = true;
|
||||
else
|
||||
ret = get_max_threads() > 1 && c;
|
||||
ret = __get_max_threads() > 1 && __c;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -131,49 +131,49 @@ namespace __gnu_parallel
|
|||
// Per-algorithm settings.
|
||||
|
||||
/// Minimal input size for accumulate.
|
||||
sequence_index_t accumulate_minimal_n;
|
||||
_SequenceIndex accumulate_minimal_n;
|
||||
|
||||
/// Minimal input size for adjacent_difference.
|
||||
unsigned int adjacent_difference_minimal_n;
|
||||
|
||||
/// Minimal input size for count and count_if.
|
||||
sequence_index_t count_minimal_n;
|
||||
_SequenceIndex count_minimal_n;
|
||||
|
||||
/// Minimal input size for fill.
|
||||
sequence_index_t fill_minimal_n;
|
||||
_SequenceIndex fill_minimal_n;
|
||||
|
||||
/// Block size increase factor for find.
|
||||
double find_increasing_factor;
|
||||
|
||||
/// Initial block size for find.
|
||||
sequence_index_t find_initial_block_size;
|
||||
_SequenceIndex find_initial_block_size;
|
||||
|
||||
/// Maximal block size for find.
|
||||
sequence_index_t find_maximum_block_size;
|
||||
_SequenceIndex find_maximum_block_size;
|
||||
|
||||
/// Start with looking for this many elements sequentially, for find.
|
||||
sequence_index_t find_sequential_search_size;
|
||||
_SequenceIndex find_sequential_search_size;
|
||||
|
||||
/// Minimal input size for for_each.
|
||||
sequence_index_t for_each_minimal_n;
|
||||
_SequenceIndex for_each_minimal_n;
|
||||
|
||||
/// Minimal input size for generate.
|
||||
sequence_index_t generate_minimal_n;
|
||||
_SequenceIndex generate_minimal_n;
|
||||
|
||||
/// Minimal input size for max_element.
|
||||
sequence_index_t max_element_minimal_n;
|
||||
_SequenceIndex max_element_minimal_n;
|
||||
|
||||
/// Minimal input size for merge.
|
||||
sequence_index_t merge_minimal_n;
|
||||
_SequenceIndex merge_minimal_n;
|
||||
|
||||
/// Oversampling factor for merge.
|
||||
unsigned int merge_oversampling;
|
||||
|
||||
/// Minimal input size for min_element.
|
||||
sequence_index_t min_element_minimal_n;
|
||||
_SequenceIndex min_element_minimal_n;
|
||||
|
||||
/// Minimal input size for multiway_merge.
|
||||
sequence_index_t multiway_merge_minimal_n;
|
||||
_SequenceIndex multiway_merge_minimal_n;
|
||||
|
||||
/// Oversampling factor for multiway_merge.
|
||||
int multiway_merge_minimal_k;
|
||||
|
|
@ -182,22 +182,22 @@ namespace __gnu_parallel
|
|||
unsigned int multiway_merge_oversampling;
|
||||
|
||||
/// Minimal input size for nth_element.
|
||||
sequence_index_t nth_element_minimal_n;
|
||||
_SequenceIndex nth_element_minimal_n;
|
||||
|
||||
/// Chunk size for partition.
|
||||
sequence_index_t partition_chunk_size;
|
||||
_SequenceIndex partition_chunk_size;
|
||||
|
||||
/// Chunk size for partition, relative to input size. If > 0.0,
|
||||
/// this value overrides partition_chunk_size.
|
||||
double partition_chunk_share;
|
||||
|
||||
/// Minimal input size for partition.
|
||||
sequence_index_t partition_minimal_n;
|
||||
_SequenceIndex partition_minimal_n;
|
||||
|
||||
/// Minimal input size for partial_sort.
|
||||
sequence_index_t partial_sort_minimal_n;
|
||||
_SequenceIndex partial_sort_minimal_n;
|
||||
|
||||
/// Ratio for partial_sum. Assume "sum and write result" to be
|
||||
/// Ratio for partial_sum. Assume "sum and write __result" to be
|
||||
/// this factor slower than just "sum".
|
||||
float partial_sum_dilation;
|
||||
|
||||
|
|
@ -208,22 +208,22 @@ namespace __gnu_parallel
|
|||
unsigned int random_shuffle_minimal_n;
|
||||
|
||||
/// Minimal input size for replace and replace_if.
|
||||
sequence_index_t replace_minimal_n;
|
||||
_SequenceIndex replace_minimal_n;
|
||||
|
||||
/// Minimal input size for set_difference.
|
||||
sequence_index_t set_difference_minimal_n;
|
||||
_SequenceIndex set_difference_minimal_n;
|
||||
|
||||
/// Minimal input size for set_intersection.
|
||||
sequence_index_t set_intersection_minimal_n;
|
||||
_SequenceIndex set_intersection_minimal_n;
|
||||
|
||||
/// Minimal input size for set_symmetric_difference.
|
||||
sequence_index_t set_symmetric_difference_minimal_n;
|
||||
_SequenceIndex set_symmetric_difference_minimal_n;
|
||||
|
||||
/// Minimal input size for set_union.
|
||||
sequence_index_t set_union_minimal_n;
|
||||
_SequenceIndex set_union_minimal_n;
|
||||
|
||||
/// Minimal input size for parallel sorting.
|
||||
sequence_index_t sort_minimal_n;
|
||||
_SequenceIndex sort_minimal_n;
|
||||
|
||||
/// Oversampling factor for parallel std::sort (MWMS).
|
||||
unsigned int sort_mwms_oversampling;
|
||||
|
|
@ -231,38 +231,38 @@ namespace __gnu_parallel
|
|||
/// Such many samples to take to find a good pivot (quicksort).
|
||||
unsigned int sort_qs_num_samples_preset;
|
||||
|
||||
/// Maximal subsequence length to switch to unbalanced base case.
|
||||
/// Maximal subsequence __length to switch to unbalanced __base case.
|
||||
/// Applies to std::sort with dynamically load-balanced quicksort.
|
||||
sequence_index_t sort_qsb_base_case_maximal_n;
|
||||
_SequenceIndex sort_qsb_base_case_maximal_n;
|
||||
|
||||
/// Minimal input size for parallel std::transform.
|
||||
sequence_index_t transform_minimal_n;
|
||||
_SequenceIndex transform_minimal_n;
|
||||
|
||||
/// Minimal input size for unique_copy.
|
||||
sequence_index_t unique_copy_minimal_n;
|
||||
_SequenceIndex unique_copy_minimal_n;
|
||||
|
||||
sequence_index_t workstealing_chunk_size;
|
||||
_SequenceIndex workstealing_chunk_size;
|
||||
|
||||
// Hardware dependent tuning parameters.
|
||||
|
||||
/// Size of the L1 cache in bytes (underestimation).
|
||||
/// size of the L1 cache in bytes (underestimation).
|
||||
unsigned long long L1_cache_size;
|
||||
|
||||
/// Size of the L2 cache in bytes (underestimation).
|
||||
/// size of the L2 cache in bytes (underestimation).
|
||||
unsigned long long L2_cache_size;
|
||||
|
||||
/// Size of the Translation Lookaside Buffer (underestimation).
|
||||
/// size of the Translation Lookaside Buffer (underestimation).
|
||||
unsigned int TLB_size;
|
||||
|
||||
/// Overestimation of cache line size. Used to avoid false
|
||||
/// sharing, i. e. elements of different threads are at least this
|
||||
/// sharing, i.e. elements of different threads are at least this
|
||||
/// amount apart.
|
||||
unsigned int cache_line_size;
|
||||
|
||||
// Statistics.
|
||||
|
||||
/// The number of stolen ranges in load-balanced quicksort.
|
||||
sequence_index_t qsb_steals;
|
||||
_SequenceIndex qsb_steals;
|
||||
|
||||
/// Get the global settings.
|
||||
_GLIBCXX_CONST static const _Settings&
|
||||
|
|
|
|||
|
|
@ -55,174 +55,174 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
//prototype
|
||||
template<bool stable, typename RandomAccessIterator,
|
||||
typename Comparator, typename Parallelism>
|
||||
template<bool __stable, typename _RAIter,
|
||||
typename _Compare, typename _Parallelism>
|
||||
void
|
||||
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, Parallelism parallelism);
|
||||
parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, _Parallelism __parallelism);
|
||||
|
||||
/**
|
||||
* @brief Choose multiway mergesort, splitting variant at run-time,
|
||||
* for parallel sorting.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool stable, typename RandomAccessIterator, typename Comparator>
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, multiway_mergesort_tag parallelism)
|
||||
parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, multiway_mergesort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
if(_Settings::get().sort_splitting == EXACT)
|
||||
parallel_sort_mwms<stable, true>
|
||||
(begin, end, comp, parallelism.get_num_threads());
|
||||
parallel_sort_mwms<__stable, true>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
else
|
||||
parallel_sort_mwms<stable, false>
|
||||
(begin, end, comp, parallelism.get_num_threads());
|
||||
parallel_sort_mwms<__stable, false>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose multiway mergesort with exact splitting,
|
||||
* @brief Choose multiway mergesort with __exact splitting,
|
||||
* for parallel sorting.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool stable, typename RandomAccessIterator, typename Comparator>
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, multiway_mergesort_exact_tag parallelism)
|
||||
parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, multiway_mergesort_exact_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
parallel_sort_mwms<stable, true>
|
||||
(begin, end, comp, parallelism.get_num_threads());
|
||||
parallel_sort_mwms<__stable, true>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose multiway mergesort with splitting by sampling,
|
||||
* for parallel sorting.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool stable, typename RandomAccessIterator, typename Comparator>
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, multiway_mergesort_sampling_tag parallelism)
|
||||
parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, multiway_mergesort_sampling_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
parallel_sort_mwms<stable, false>
|
||||
(begin, end, comp, parallelism.get_num_threads());
|
||||
parallel_sort_mwms<__stable, false>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose quicksort for parallel sorting.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool stable, typename RandomAccessIterator, typename Comparator>
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, quicksort_tag parallelism)
|
||||
parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, quicksort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
_GLIBCXX_PARALLEL_ASSERT(stable == false);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
|
||||
|
||||
parallel_sort_qs(begin, end, comp, parallelism.get_num_threads());
|
||||
__parallel_sort_qs(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose balanced quicksort for parallel sorting.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator.
|
||||
* @param stable Sort stable.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __stable Sort __stable.
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool stable, typename RandomAccessIterator, typename Comparator>
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, balanced_quicksort_tag parallelism)
|
||||
parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, balanced_quicksort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
_GLIBCXX_PARALLEL_ASSERT(stable == false);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
|
||||
|
||||
parallel_sort_qsb(begin, end, comp, parallelism.get_num_threads());
|
||||
__parallel_sort_qsb(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Choose multiway mergesort with exact splitting,
|
||||
* @brief Choose multiway mergesort with __exact splitting,
|
||||
* for parallel sorting.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool stable, typename RandomAccessIterator, typename Comparator>
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, default_parallel_tag parallelism)
|
||||
parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, default_parallel_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
parallel_sort<stable>
|
||||
(begin, end, comp,
|
||||
multiway_mergesort_exact_tag(parallelism.get_num_threads()));
|
||||
parallel_sort<__stable>
|
||||
(__begin, __end, __comp,
|
||||
multiway_mergesort_exact_tag(__parallelism.__get_num_threads()));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Choose a parallel sorting algorithm.
|
||||
* @param begin Begin iterator of input sequence.
|
||||
* @param end End iterator of input sequence.
|
||||
* @param comp Comparator.
|
||||
* @param stable Sort stable.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __stable Sort __stable.
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool stable, typename RandomAccessIterator, typename Comparator>
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, parallel_tag parallelism)
|
||||
parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, parallel_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
if (false) ;
|
||||
#if _GLIBCXX_MERGESORT
|
||||
else if (stable || _Settings::get().sort_algorithm == MWMS)
|
||||
else if (__stable || _Settings::get().sort_algorithm == MWMS)
|
||||
{
|
||||
if(_Settings::get().sort_splitting == EXACT)
|
||||
parallel_sort_mwms<stable, true>
|
||||
(begin, end, comp, parallelism.get_num_threads());
|
||||
parallel_sort_mwms<__stable, true>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
else
|
||||
parallel_sort_mwms<false, false>
|
||||
(begin, end, comp, parallelism.get_num_threads());
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
#endif
|
||||
#if _GLIBCXX_QUICKSORT
|
||||
else if (_Settings::get().sort_algorithm == QS)
|
||||
parallel_sort_qs(begin, end, comp, parallelism.get_num_threads());
|
||||
__parallel_sort_qs(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
#endif
|
||||
#if _GLIBCXX_BAL_QUICKSORT
|
||||
else if (_Settings::get().sort_algorithm == QS_BALANCED)
|
||||
parallel_sort_qsb(begin, end, comp, parallelism.get_num_threads());
|
||||
__parallel_sort_qsb(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
#endif
|
||||
else
|
||||
__gnu_sequential::sort(begin, end, comp);
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
}
|
||||
} // end namespace __gnu_parallel
|
||||
|
||||
|
|
|
|||
|
|
@ -46,37 +46,37 @@ namespace __gnu_parallel
|
|||
struct parallel_tag
|
||||
{
|
||||
private:
|
||||
thread_index_t num_threads;
|
||||
_ThreadIndex __num_threads;
|
||||
|
||||
public:
|
||||
/** @brief Default constructor. Use default number of threads. */
|
||||
parallel_tag()
|
||||
{
|
||||
this->num_threads = 0;
|
||||
this->__num_threads = 0;
|
||||
}
|
||||
|
||||
/** @brief Default constructor. Recommend number of threads to use.
|
||||
* @param num_threads Desired number of threads. */
|
||||
parallel_tag(thread_index_t num_threads)
|
||||
* @param __num_threads Desired number of threads. */
|
||||
parallel_tag(_ThreadIndex __num_threads)
|
||||
{
|
||||
this->num_threads = num_threads;
|
||||
this->__num_threads = __num_threads;
|
||||
}
|
||||
|
||||
/** @brief Find out desired number of threads.
|
||||
/** @brief Find __out desired number of threads.
|
||||
* @return Desired number of threads. */
|
||||
inline thread_index_t get_num_threads()
|
||||
inline _ThreadIndex __get_num_threads()
|
||||
{
|
||||
if(num_threads == 0)
|
||||
if(__num_threads == 0)
|
||||
return omp_get_max_threads();
|
||||
else
|
||||
return num_threads;
|
||||
return __num_threads;
|
||||
}
|
||||
|
||||
/** @brief Set the desired number of threads.
|
||||
* @param num_threads Desired number of threads. */
|
||||
inline void set_num_threads(thread_index_t num_threads)
|
||||
* @param __num_threads Desired number of threads. */
|
||||
inline void set_num_threads(_ThreadIndex __num_threads)
|
||||
{
|
||||
this->num_threads = num_threads;
|
||||
this->__num_threads = __num_threads;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -85,8 +85,8 @@ namespace __gnu_parallel
|
|||
struct default_parallel_tag : public parallel_tag
|
||||
{
|
||||
default_parallel_tag() { }
|
||||
default_parallel_tag(thread_index_t num_threads)
|
||||
: parallel_tag(num_threads) { }
|
||||
default_parallel_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Recommends parallel execution using dynamic
|
||||
|
|
@ -111,21 +111,21 @@ namespace __gnu_parallel
|
|||
|
||||
|
||||
/** @brief Forces parallel merging
|
||||
* with exact splitting, at compile time. */
|
||||
* with __exact splitting, at compile time. */
|
||||
struct exact_tag : public parallel_tag
|
||||
{
|
||||
exact_tag() { }
|
||||
exact_tag(thread_index_t num_threads)
|
||||
: parallel_tag(num_threads) { }
|
||||
exact_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel merging
|
||||
* with exact splitting, at compile time. */
|
||||
* with __exact splitting, at compile time. */
|
||||
struct sampling_tag : public parallel_tag
|
||||
{
|
||||
sampling_tag() { }
|
||||
sampling_tag(thread_index_t num_threads)
|
||||
: parallel_tag(num_threads) { }
|
||||
sampling_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -134,17 +134,17 @@ namespace __gnu_parallel
|
|||
struct multiway_mergesort_tag : public parallel_tag
|
||||
{
|
||||
multiway_mergesort_tag() { }
|
||||
multiway_mergesort_tag(thread_index_t num_threads)
|
||||
: parallel_tag(num_threads) { }
|
||||
multiway_mergesort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel sorting using multiway mergesort
|
||||
* with exact splitting at compile time. */
|
||||
* with __exact splitting at compile time. */
|
||||
struct multiway_mergesort_exact_tag : public parallel_tag
|
||||
{
|
||||
multiway_mergesort_exact_tag() { }
|
||||
multiway_mergesort_exact_tag(thread_index_t num_threads)
|
||||
: parallel_tag(num_threads) { }
|
||||
multiway_mergesort_exact_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel sorting using multiway mergesort
|
||||
|
|
@ -152,8 +152,8 @@ namespace __gnu_parallel
|
|||
struct multiway_mergesort_sampling_tag : public parallel_tag
|
||||
{
|
||||
multiway_mergesort_sampling_tag() { }
|
||||
multiway_mergesort_sampling_tag(thread_index_t num_threads)
|
||||
: parallel_tag(num_threads) { }
|
||||
multiway_mergesort_sampling_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel sorting using unbalanced quicksort
|
||||
|
|
@ -161,8 +161,8 @@ namespace __gnu_parallel
|
|||
struct quicksort_tag : public parallel_tag
|
||||
{
|
||||
quicksort_tag() { }
|
||||
quicksort_tag(thread_index_t num_threads)
|
||||
: parallel_tag(num_threads) { }
|
||||
quicksort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel sorting using balanced quicksort
|
||||
|
|
@ -170,8 +170,8 @@ namespace __gnu_parallel
|
|||
struct balanced_quicksort_tag : public parallel_tag
|
||||
{
|
||||
balanced_quicksort_tag() { }
|
||||
balanced_quicksort_tag(thread_index_t num_threads)
|
||||
: parallel_tag(num_threads) { }
|
||||
balanced_quicksort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ namespace __gnu_parallel
|
|||
};
|
||||
|
||||
/// Merging algorithms:
|
||||
// bubblesort-alike, loser-tree variants, enum sentinel.
|
||||
// bubblesort-alike, loser-tree variants, enum __sentinel.
|
||||
enum _MultiwayMergeAlgorithm
|
||||
{
|
||||
LOSER_TREE
|
||||
|
|
@ -92,7 +92,7 @@ namespace __gnu_parallel
|
|||
LINEAR
|
||||
};
|
||||
|
||||
/// Sorting/merging algorithms: sampling, exact.
|
||||
/// Sorting/merging algorithms: sampling, __exact.
|
||||
enum _SplittingAlgorithm
|
||||
{
|
||||
SAMPLING,
|
||||
|
|
@ -108,7 +108,7 @@ namespace __gnu_parallel
|
|||
EQUAL_SPLIT
|
||||
};
|
||||
|
||||
/// Integer Types.
|
||||
/// _Integer Types.
|
||||
// XXX need to use <cstdint>
|
||||
/** @brief 16-bit signed integer. */
|
||||
typedef short int16;
|
||||
|
|
@ -129,27 +129,27 @@ namespace __gnu_parallel
|
|||
typedef unsigned long long uint64;
|
||||
|
||||
/**
|
||||
* @brief Unsigned integer to index elements.
|
||||
* @brief Unsigned integer to index __elements.
|
||||
* The total number of elements for each algorithm must fit into this type.
|
||||
*/
|
||||
typedef uint64 sequence_index_t;
|
||||
typedef uint64 _SequenceIndex;
|
||||
|
||||
/**
|
||||
* @brief Unsigned integer to index a thread number.
|
||||
* The maximum thread number (for each processor) must fit into this type.
|
||||
*/
|
||||
typedef uint16 thread_index_t;
|
||||
typedef uint16 _ThreadIndex;
|
||||
|
||||
// XXX atomics interface?
|
||||
/// Longest compare-and-swappable integer type on this platform.
|
||||
typedef int64 lcas_t;
|
||||
typedef int64 _CASable;
|
||||
|
||||
// XXX numeric_limits::digits?
|
||||
/// Number of bits of ::lcas_t.
|
||||
static const int lcas_t_bits = sizeof(lcas_t) * 8;
|
||||
/// Number of bits of ::_CASable.
|
||||
static const int _CASable_bits = sizeof(_CASable) * 8;
|
||||
|
||||
/// ::lcas_t with the right half of bits set to 1.
|
||||
static const lcas_t lcas_t_mask = ((lcas_t(1) << (lcas_t_bits / 2)) - 1);
|
||||
/// ::_CASable with the right half of bits set to 1.
|
||||
static const _CASable _CASable_mask = ((_CASable(1) << (_CASable_bits / 2)) - 1);
|
||||
}
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_TYPES_H */
|
||||
|
|
|
|||
|
|
@ -38,153 +38,153 @@
|
|||
namespace __gnu_parallel
|
||||
{
|
||||
|
||||
/** @brief Parallel std::unique_copy(), w/o explicit equality predicate.
|
||||
* @param first Begin iterator of input sequence.
|
||||
* @param last End iterator of input sequence.
|
||||
* @param result Begin iterator of result sequence.
|
||||
* @param binary_pred Equality predicate.
|
||||
* @return End iterator of result sequence. */
|
||||
template<typename InputIterator,
|
||||
class OutputIterator,
|
||||
class BinaryPredicate>
|
||||
OutputIterator
|
||||
parallel_unique_copy(InputIterator first, InputIterator last,
|
||||
OutputIterator result, BinaryPredicate binary_pred)
|
||||
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate.
|
||||
* @param __first Begin iterator of input sequence.
|
||||
* @param __last End iterator of input sequence.
|
||||
* @param __result Begin iterator of result __sequence.
|
||||
* @param __binary_pred Equality predicate.
|
||||
* @return End iterator of result __sequence. */
|
||||
template<typename _IIter,
|
||||
class _OutputIterator,
|
||||
class _BinaryPredicate>
|
||||
_OutputIterator
|
||||
__parallel_unique_copy(_IIter __first, _IIter __last,
|
||||
_OutputIterator __result, _BinaryPredicate __binary_pred)
|
||||
{
|
||||
_GLIBCXX_CALL(last - first)
|
||||
_GLIBCXX_CALL(__last - __first)
|
||||
|
||||
typedef std::iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
difference_type size = last - first;
|
||||
_DifferenceType size = __last - __first;
|
||||
|
||||
if (size == 0)
|
||||
return result;
|
||||
return __result;
|
||||
|
||||
// Let the first thread process two parts.
|
||||
difference_type *counter;
|
||||
difference_type *borders;
|
||||
_DifferenceType *__counter;
|
||||
_DifferenceType *__borders;
|
||||
|
||||
thread_index_t num_threads = get_max_threads();
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
// First part contains at least one element.
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
borders = new difference_type[num_threads + 2];
|
||||
equally_split(size, num_threads + 1, borders);
|
||||
counter = new difference_type[num_threads + 1];
|
||||
__num_threads = omp_get_num_threads();
|
||||
__borders = new _DifferenceType[__num_threads + 2];
|
||||
equally_split(size, __num_threads + 1, __borders);
|
||||
__counter = new _DifferenceType[__num_threads + 1];
|
||||
}
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
difference_type begin, end;
|
||||
_DifferenceType __begin, __end;
|
||||
|
||||
// Check for length without duplicates
|
||||
// Needed for position in output
|
||||
difference_type i = 0;
|
||||
OutputIterator out = result;
|
||||
_DifferenceType __i = 0;
|
||||
_OutputIterator __out = __result;
|
||||
|
||||
if (iam == 0)
|
||||
if (__iam == 0)
|
||||
{
|
||||
begin = borders[0] + 1; // == 1
|
||||
end = borders[iam + 1];
|
||||
__begin = __borders[0] + 1; // == 1
|
||||
__end = __borders[__iam + 1];
|
||||
|
||||
++i;
|
||||
*out++ = *first;
|
||||
++__i;
|
||||
*__out++ = *__first;
|
||||
|
||||
for (InputIterator iter = first + begin; iter < first + end; ++iter)
|
||||
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
|
||||
{
|
||||
if (!binary_pred(*iter, *(iter-1)))
|
||||
if (!__binary_pred(*iter, *(iter-1)))
|
||||
{
|
||||
++i;
|
||||
*out++ = *iter;
|
||||
++__i;
|
||||
*__out++ = *iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
begin = borders[iam]; //one part
|
||||
end = borders[iam + 1];
|
||||
__begin = __borders[__iam]; //one part
|
||||
__end = __borders[__iam + 1];
|
||||
|
||||
for (InputIterator iter = first + begin; iter < first + end; ++iter)
|
||||
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
|
||||
{
|
||||
if (!binary_pred(*iter, *(iter - 1)))
|
||||
++i;
|
||||
if (!__binary_pred(*iter, *(iter - 1)))
|
||||
++__i;
|
||||
}
|
||||
}
|
||||
counter[iam] = i;
|
||||
__counter[__iam] = __i;
|
||||
|
||||
// Last part still untouched.
|
||||
difference_type begin_output;
|
||||
_DifferenceType __begin_output;
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Store result in output on calculated positions.
|
||||
begin_output = 0;
|
||||
__begin_output = 0;
|
||||
|
||||
if (iam == 0)
|
||||
if (__iam == 0)
|
||||
{
|
||||
for (int t = 0; t < num_threads; ++t)
|
||||
begin_output += counter[t];
|
||||
for (int __t = 0; __t < __num_threads; ++__t)
|
||||
__begin_output += __counter[__t];
|
||||
|
||||
i = 0;
|
||||
__i = 0;
|
||||
|
||||
OutputIterator iter_out = result + begin_output;
|
||||
_OutputIterator __iter_out = __result + __begin_output;
|
||||
|
||||
begin = borders[num_threads];
|
||||
end = size;
|
||||
__begin = __borders[__num_threads];
|
||||
__end = size;
|
||||
|
||||
for (InputIterator iter = first + begin; iter < first + end; ++iter)
|
||||
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
|
||||
{
|
||||
if (iter == first || !binary_pred(*iter, *(iter - 1)))
|
||||
if (iter == __first || !__binary_pred(*iter, *(iter - 1)))
|
||||
{
|
||||
++i;
|
||||
*iter_out++ = *iter;
|
||||
++__i;
|
||||
*__iter_out++ = *iter;
|
||||
}
|
||||
}
|
||||
|
||||
counter[num_threads] = i;
|
||||
__counter[__num_threads] = __i;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int t = 0; t < iam; t++)
|
||||
begin_output += counter[t];
|
||||
for (int __t = 0; __t < __iam; __t++)
|
||||
__begin_output += __counter[__t];
|
||||
|
||||
OutputIterator iter_out = result + begin_output;
|
||||
for (InputIterator iter = first + begin; iter < first + end; ++iter)
|
||||
_OutputIterator __iter_out = __result + __begin_output;
|
||||
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
|
||||
{
|
||||
if (!binary_pred(*iter, *(iter-1)))
|
||||
*iter_out++ = *iter;
|
||||
if (!__binary_pred(*iter, *(iter-1)))
|
||||
*__iter_out++ = *iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
difference_type end_output = 0;
|
||||
for (int t = 0; t < num_threads + 1; t++)
|
||||
end_output += counter[t];
|
||||
_DifferenceType __end_output = 0;
|
||||
for (int __t = 0; __t < __num_threads + 1; __t++)
|
||||
__end_output += __counter[__t];
|
||||
|
||||
delete[] borders;
|
||||
delete[] __borders;
|
||||
|
||||
return result + end_output;
|
||||
return __result + __end_output;
|
||||
}
|
||||
|
||||
/** @brief Parallel std::unique_copy(), without explicit equality predicate
|
||||
* @param first Begin iterator of input sequence.
|
||||
* @param last End iterator of input sequence.
|
||||
* @param result Begin iterator of result sequence.
|
||||
* @return End iterator of result sequence. */
|
||||
template<typename InputIterator, class OutputIterator>
|
||||
inline OutputIterator
|
||||
parallel_unique_copy(InputIterator first, InputIterator last,
|
||||
OutputIterator result)
|
||||
* @param __first Begin iterator of input sequence.
|
||||
* @param __last End iterator of input sequence.
|
||||
* @param __result Begin iterator of result __sequence.
|
||||
* @return End iterator of result __sequence. */
|
||||
template<typename _IIter, class _OutputIterator>
|
||||
inline _OutputIterator
|
||||
__parallel_unique_copy(_IIter __first, _IIter __last,
|
||||
_OutputIterator __result)
|
||||
{
|
||||
typedef typename std::iterator_traits<InputIterator>::value_type
|
||||
value_type;
|
||||
return parallel_unique_copy(first, last, result,
|
||||
std::equal_to<value_type>());
|
||||
typedef typename std::iterator_traits<_IIter>::value_type
|
||||
_ValueType;
|
||||
return __parallel_unique_copy(__first, __last, __result,
|
||||
std::equal_to<_ValueType>());
|
||||
}
|
||||
|
||||
}//namespace __gnu_parallel
|
||||
|
|
|
|||
|
|
@ -49,257 +49,257 @@ namespace __gnu_parallel
|
|||
|
||||
#define _GLIBCXX_JOB_VOLATILE volatile
|
||||
|
||||
/** @brief One job for a certain thread. */
|
||||
/** @brief One __job for a certain thread. */
|
||||
template<typename _DifferenceTp>
|
||||
struct Job
|
||||
struct _Job
|
||||
{
|
||||
typedef _DifferenceTp difference_type;
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
|
||||
/** @brief First element.
|
||||
*
|
||||
* Changed by owning and stealing thread. By stealing thread,
|
||||
* always incremented. */
|
||||
_GLIBCXX_JOB_VOLATILE difference_type first;
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType __first;
|
||||
|
||||
/** @brief Last element.
|
||||
*
|
||||
* Changed by owning thread only. */
|
||||
_GLIBCXX_JOB_VOLATILE difference_type last;
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType __last;
|
||||
|
||||
/** @brief Number of elements, i. e. @c last-first+1.
|
||||
/** @brief Number of elements, i.e. @__c __last-__first+1.
|
||||
*
|
||||
* Changed by owning thread only. */
|
||||
_GLIBCXX_JOB_VOLATILE difference_type load;
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType __load;
|
||||
};
|
||||
|
||||
/** @brief Work stealing algorithm for random access iterators.
|
||||
*
|
||||
* Uses O(1) additional memory. Synchronization at job lists is
|
||||
* Uses O(1) additional memory. Synchronization at __job lists is
|
||||
* done with atomic operations.
|
||||
* @param begin Begin iterator of element sequence.
|
||||
* @param end End iterator of element sequence.
|
||||
* @param op User-supplied functor (comparator, predicate, adding
|
||||
* @param __begin Begin iterator of element __sequence.
|
||||
* @param __end End iterator of element __sequence.
|
||||
* @param __op User-supplied functor (comparator, predicate, adding
|
||||
* functor, ...).
|
||||
* @param f Functor to "process" an element with op (depends on
|
||||
* @param __f Functor to "process" an element with __op (depends on
|
||||
* desired functionality, e. g. for std::for_each(), ...).
|
||||
* @param r Functor to "add" a single result to the already
|
||||
* processed elements (depends on functionality).
|
||||
* @param base Base value for reduction.
|
||||
* @param output Pointer to position where final result is written to
|
||||
* @param bound Maximum number of elements processed (e. g. for
|
||||
* @param __r Functor to "add" a single __result to the already
|
||||
* processed __elements (depends on functionality).
|
||||
* @param __base Base value for reduction.
|
||||
* @param __output Pointer to position where final result is written to
|
||||
* @param __bound Maximum number of elements processed (e. g. for
|
||||
* std::count_n()).
|
||||
* @return User-supplied functor (that may contain a part of the result).
|
||||
*/
|
||||
template<typename RandomAccessIterator,
|
||||
typename Op,
|
||||
typename Fu,
|
||||
typename Red,
|
||||
typename Result>
|
||||
Op
|
||||
for_each_template_random_access_workstealing(RandomAccessIterator begin,
|
||||
RandomAccessIterator end,
|
||||
Op op, Fu& f, Red r,
|
||||
Result base, Result& output,
|
||||
template<typename _RAIter,
|
||||
typename _Op,
|
||||
typename _Fu,
|
||||
typename _Red,
|
||||
typename _Result>
|
||||
_Op
|
||||
for_each_template_random_access_workstealing(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Op __op, _Fu& __f, _Red __r,
|
||||
_Result __base, _Result& __output,
|
||||
typename std::iterator_traits
|
||||
<RandomAccessIterator>::
|
||||
difference_type bound)
|
||||
<_RAIter>::
|
||||
difference_type __bound)
|
||||
{
|
||||
_GLIBCXX_CALL(end - begin)
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
difference_type chunk_size = static_cast<difference_type>(__s.workstealing_chunk_size);
|
||||
_DifferenceType __chunk_size = static_cast<_DifferenceType>(__s.workstealing_chunk_size);
|
||||
|
||||
// How many jobs?
|
||||
difference_type length = (bound < 0) ? (end - begin) : bound;
|
||||
_DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
|
||||
|
||||
// To avoid false sharing in a cache line.
|
||||
const int stride = __s.cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
|
||||
const int __stride = __s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1;
|
||||
|
||||
// Total number of threads currently working.
|
||||
thread_index_t busy = 0;
|
||||
_ThreadIndex __busy = 0;
|
||||
|
||||
Job<difference_type> *job;
|
||||
_Job<_DifferenceType> *__job;
|
||||
|
||||
omp_lock_t output_lock;
|
||||
omp_init_lock(&output_lock);
|
||||
omp_lock_t __output_lock;
|
||||
omp_init_lock(&__output_lock);
|
||||
|
||||
// Write base value to output.
|
||||
output = base;
|
||||
// Write __base __value to output.
|
||||
__output = __base;
|
||||
|
||||
// No more threads than jobs, at least one thread.
|
||||
thread_index_t num_threads =
|
||||
__gnu_parallel::max<thread_index_t>(1,
|
||||
__gnu_parallel::min<difference_type>(length, get_max_threads()));
|
||||
_ThreadIndex __num_threads =
|
||||
__gnu_parallel::max<_ThreadIndex>(1,
|
||||
__gnu_parallel::min<_DifferenceType>(__length, __get_max_threads()));
|
||||
|
||||
# pragma omp parallel shared(busy) num_threads(num_threads)
|
||||
# pragma omp parallel shared(__busy) num_threads(__num_threads)
|
||||
{
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
// Create job description array.
|
||||
job = new Job<difference_type>[num_threads * stride];
|
||||
// Create __job description array.
|
||||
__job = new _Job<_DifferenceType>[__num_threads * __stride];
|
||||
}
|
||||
|
||||
// Initialization phase.
|
||||
|
||||
// Flags for every thread if it is doing productive work.
|
||||
bool iam_working = false;
|
||||
bool __iam_working = false;
|
||||
|
||||
// Thread id.
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
// This job.
|
||||
Job<difference_type>& my_job = job[iam * stride];
|
||||
// This __job.
|
||||
_Job<_DifferenceType>& __my_job = __job[__iam * __stride];
|
||||
|
||||
// Random number (for work stealing).
|
||||
thread_index_t victim;
|
||||
_ThreadIndex __victim;
|
||||
|
||||
// Local value for reduction.
|
||||
Result result = Result();
|
||||
_Result __result = _Result();
|
||||
|
||||
// Number of elements to steal in one attempt.
|
||||
difference_type steal;
|
||||
_DifferenceType __steal;
|
||||
|
||||
// Every thread has its own random number generator
|
||||
// (modulo num_threads).
|
||||
random_number rand_gen(iam, num_threads);
|
||||
// (modulo __num_threads).
|
||||
_RandomNumber rand_gen(__iam, __num_threads);
|
||||
|
||||
// This thread is currently working.
|
||||
# pragma omp atomic
|
||||
++busy;
|
||||
++__busy;
|
||||
|
||||
iam_working = true;
|
||||
__iam_working = true;
|
||||
|
||||
// How many jobs per thread? last thread gets the rest.
|
||||
my_job.first =
|
||||
static_cast<difference_type>(iam * (length / num_threads));
|
||||
__my_job.__first =
|
||||
static_cast<_DifferenceType>(__iam * (__length / __num_threads));
|
||||
|
||||
my_job.last = (iam == (num_threads - 1)) ?
|
||||
(length - 1) : ((iam + 1) * (length / num_threads) - 1);
|
||||
my_job.load = my_job.last - my_job.first + 1;
|
||||
__my_job.__last = (__iam == (__num_threads - 1)) ?
|
||||
(__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1);
|
||||
__my_job.__load = __my_job.__last - __my_job.__first + 1;
|
||||
|
||||
// Init result with first value (to have a base value for reduction).
|
||||
if (my_job.first <= my_job.last)
|
||||
// Init __result with __first __value (to have a base value for reduction).
|
||||
if (__my_job.__first <= __my_job.__last)
|
||||
{
|
||||
// Cannot use volatile variable directly.
|
||||
difference_type my_first = my_job.first;
|
||||
result = f(op, begin + my_first);
|
||||
++my_job.first;
|
||||
--my_job.load;
|
||||
_DifferenceType __my_first = __my_job.__first;
|
||||
__result = __f(__op, __begin + __my_first);
|
||||
++__my_job.__first;
|
||||
--__my_job.__load;
|
||||
}
|
||||
|
||||
RandomAccessIterator current;
|
||||
_RAIter __current;
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Actual work phase
|
||||
// Work on own or stolen start
|
||||
while (busy > 0)
|
||||
// Work on own or stolen __start
|
||||
while (__busy > 0)
|
||||
{
|
||||
// Work until no productive thread left.
|
||||
# pragma omp flush(busy)
|
||||
// Work until no productive thread __left.
|
||||
# pragma omp flush(__busy)
|
||||
|
||||
// Thread has own work to do
|
||||
while (my_job.first <= my_job.last)
|
||||
while (__my_job.__first <= __my_job.__last)
|
||||
{
|
||||
// fetch-and-add call
|
||||
// Reserve current job block (size chunk_size) in my queue.
|
||||
difference_type current_job =
|
||||
fetch_and_add<difference_type>(&(my_job.first), chunk_size);
|
||||
// Reserve __current __job block (size __chunk_size) in my queue.
|
||||
_DifferenceType current_job =
|
||||
__fetch_and_add<_DifferenceType>(&(__my_job.__first), __chunk_size);
|
||||
|
||||
// Update load, to make the three values consistent,
|
||||
// first might have been changed in the meantime
|
||||
my_job.load = my_job.last - my_job.first + 1;
|
||||
for (difference_type job_counter = 0;
|
||||
job_counter < chunk_size && current_job <= my_job.last;
|
||||
// Update __load, to make the three values consistent,
|
||||
// __first might have been changed in the meantime
|
||||
__my_job.__load = __my_job.__last - __my_job.__first + 1;
|
||||
for (_DifferenceType job_counter = 0;
|
||||
job_counter < __chunk_size && current_job <= __my_job.__last;
|
||||
++job_counter)
|
||||
{
|
||||
// Yes: process it!
|
||||
current = begin + current_job;
|
||||
__current = __begin + current_job;
|
||||
++current_job;
|
||||
|
||||
// Do actual work.
|
||||
result = r(result, f(op, current));
|
||||
__result = __r(__result, __f(__op, __current));
|
||||
}
|
||||
|
||||
# pragma omp flush(busy)
|
||||
# pragma omp flush(__busy)
|
||||
}
|
||||
|
||||
// After reaching this point, a thread's job list is empty.
|
||||
if (iam_working)
|
||||
// After reaching this point, a thread's __job list is empty.
|
||||
if (__iam_working)
|
||||
{
|
||||
// This thread no longer has work.
|
||||
# pragma omp atomic
|
||||
--busy;
|
||||
--__busy;
|
||||
|
||||
iam_working = false;
|
||||
__iam_working = false;
|
||||
}
|
||||
|
||||
difference_type supposed_first, supposed_last, supposed_load;
|
||||
_DifferenceType __supposed_first, __supposed_last, __supposed_load;
|
||||
do
|
||||
{
|
||||
// Find random nonempty deque (not own), do consistency check.
|
||||
yield();
|
||||
# pragma omp flush(busy)
|
||||
victim = rand_gen();
|
||||
supposed_first = job[victim * stride].first;
|
||||
supposed_last = job[victim * stride].last;
|
||||
supposed_load = job[victim * stride].load;
|
||||
__yield();
|
||||
# pragma omp flush(__busy)
|
||||
__victim = rand_gen();
|
||||
__supposed_first = __job[__victim * __stride].__first;
|
||||
__supposed_last = __job[__victim * __stride].__last;
|
||||
__supposed_load = __job[__victim * __stride].__load;
|
||||
}
|
||||
while (busy > 0
|
||||
&& ((supposed_load <= 0)
|
||||
|| ((supposed_first + supposed_load - 1) != supposed_last)));
|
||||
while (__busy > 0
|
||||
&& ((__supposed_load <= 0)
|
||||
|| ((__supposed_first + __supposed_load - 1) != __supposed_last)));
|
||||
|
||||
if (busy == 0)
|
||||
if (__busy == 0)
|
||||
break;
|
||||
|
||||
if (supposed_load > 0)
|
||||
if (__supposed_load > 0)
|
||||
{
|
||||
// Has work and work to do.
|
||||
// Number of elements to steal (at least one).
|
||||
steal = (supposed_load < 2) ? 1 : supposed_load / 2;
|
||||
__steal = (__supposed_load < 2) ? 1 : __supposed_load / 2;
|
||||
|
||||
// Push victim's start forward.
|
||||
difference_type stolen_first =
|
||||
fetch_and_add<difference_type>(
|
||||
&(job[victim * stride].first), steal);
|
||||
difference_type stolen_try =
|
||||
stolen_first + steal - difference_type(1);
|
||||
// Push __victim's __start forward.
|
||||
_DifferenceType __stolen_first =
|
||||
__fetch_and_add<_DifferenceType>(
|
||||
&(__job[__victim * __stride].__first), __steal);
|
||||
_DifferenceType stolen_try =
|
||||
__stolen_first + __steal - _DifferenceType(1);
|
||||
|
||||
my_job.first = stolen_first;
|
||||
my_job.last = __gnu_parallel::min(stolen_try, supposed_last);
|
||||
my_job.load = my_job.last - my_job.first + 1;
|
||||
__my_job.__first = __stolen_first;
|
||||
__my_job.__last = __gnu_parallel::min(stolen_try, __supposed_last);
|
||||
__my_job.__load = __my_job.__last - __my_job.__first + 1;
|
||||
|
||||
// Has potential work again.
|
||||
# pragma omp atomic
|
||||
++busy;
|
||||
iam_working = true;
|
||||
++__busy;
|
||||
__iam_working = true;
|
||||
|
||||
# pragma omp flush(busy)
|
||||
# pragma omp flush(__busy)
|
||||
}
|
||||
# pragma omp flush(busy)
|
||||
} // end while busy > 0
|
||||
// Add accumulated result to output.
|
||||
omp_set_lock(&output_lock);
|
||||
output = r(output, result);
|
||||
omp_unset_lock(&output_lock);
|
||||
# pragma omp flush(__busy)
|
||||
} // end while __busy > 0
|
||||
// Add accumulated __result to output.
|
||||
omp_set_lock(&__output_lock);
|
||||
__output = __r(__output, __result);
|
||||
omp_unset_lock(&__output_lock);
|
||||
}
|
||||
|
||||
delete[] job;
|
||||
delete[] __job;
|
||||
|
||||
// Points to last element processed (needed as return value for
|
||||
// some algorithms like transform)
|
||||
f.finish_iterator = begin + length;
|
||||
__f.finish_iterator = __begin + __length;
|
||||
|
||||
omp_destroy_lock(&output_lock);
|
||||
omp_destroy_lock(&__output_lock);
|
||||
|
||||
return op;
|
||||
return __op;
|
||||
}
|
||||
} // end namespace
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue