mirror of git://gcc.gnu.org/git/gcc.git
re PR libstdc++/34797 ([parallel mode] Settings are separated for each compilation unit)
2008-02-17 Benjamin Kosnik <bkoz@redhat.com> PR libstdc++/34797 * include/parallel/settings.h (_Settings): Reconstruct Settings class here, uglify, remove anonymous namespace and static members. Convert to datum. * include/parallel/types.h: Move Settings:: enumerations here, uglify. * src/parallel_settings.cc: New, definition for _Settings member functions. * include/parallel/multiway_merge.h: Same. * include/parallel/for_each.h: Same. * include/parallel/workstealing.h: Same. * include/parallel/base.h: Same. * include/parallel/numeric * include/parallel/features.h: Same. * include/parallel/quicksort.h: Same. * include/parallel/equally_split.h: Same. * include/parallel/algorithmfwd.h: Same. * include/parallel/omp_loop_static.h: Same. * include/parallel/random_shuffle.h: Same. * include/parallel/balanced_quicksort.h: Same. * include/parallel/tags.h: Same. * include/parallel/multiway_mergesort.h: Same. * include/parallel/numericfwd.h: Same. * include/parallel/partition.h: Same. * include/parallel/partial_sum.h: Same. * include/parallel/find.h: Same. * include/parallel/algo.h: Same. * include/parallel/omp_loop.h: Same. * include/parallel/sort.h: Same. * src/Makefile.am (parallel_sources): Add parallel_settings.cc. * src/Makefile.in: Regenerate. * config/abi/pre/gnu.ver: Export _Settings::get and _Settings::set. From-SVN: r132383
This commit is contained in:
parent
e69044cb35
commit
ee1b5fc5e0
|
|
@ -1,3 +1,39 @@
|
|||
2008-02-17 Benjamin Kosnik <bkoz@redhat.com>
|
||||
|
||||
PR libstdc++/34797
|
||||
* include/parallel/settings.h (_Settings): Reconstruct Settings class
|
||||
here, uglify, remove anonymous namespace and static
|
||||
members. Convert to datum.
|
||||
* include/parallel/types.h: Move Settings:: enumerations here, uglify.
|
||||
* src/parallel_settings.cc: New, definition for _Settings member
|
||||
functions.
|
||||
* include/parallel/multiway_merge.h: Same.
|
||||
* include/parallel/for_each.h: Same.
|
||||
* include/parallel/workstealing.h: Same.
|
||||
* include/parallel/base.h: Same.
|
||||
* include/parallel/numeric
|
||||
* include/parallel/features.h: Same.
|
||||
* include/parallel/quicksort.h: Same.
|
||||
* include/parallel/equally_split.h: Same.
|
||||
* include/parallel/algorithmfwd.h: Same.
|
||||
* include/parallel/omp_loop_static.h: Same.
|
||||
* include/parallel/random_shuffle.h: Same.
|
||||
* include/parallel/balanced_quicksort.h: Same.
|
||||
* include/parallel/tags.h: Same.
|
||||
* include/parallel/multiway_mergesort.h: Same.
|
||||
* include/parallel/numericfwd.h: Same.
|
||||
* include/parallel/partition.h: Same.
|
||||
* include/parallel/partial_sum.h: Same.
|
||||
* include/parallel/find.h: Same.
|
||||
* include/parallel/algo.h: Same.
|
||||
* include/parallel/omp_loop.h: Same.
|
||||
* include/parallel/sort.h: Same.
|
||||
|
||||
* src/Makefile.am (parallel_sources): Add parallel_settings.cc.
|
||||
* src/Makefile.in: Regenerate.
|
||||
|
||||
* config/abi/pre/gnu.ver: Export _Settings::get and _Settings::set.
|
||||
|
||||
2008-02-17 Paolo Carlini <pcarlini@suse.de>
|
||||
|
||||
PR libstdc++/35221
|
||||
|
|
|
|||
|
|
@ -779,7 +779,15 @@ GLIBCXX_3.4.10 {
|
|||
_ZNKSt4hashISt10error_codeEclES0_;
|
||||
_ZNKSt4hashI[eg]EclE[eg];
|
||||
|
||||
_ZSt17__verify_grouping*;
|
||||
|
||||
_ZNSt8__detail12__prime_listE;
|
||||
_ZNSt3tr18__detail12__prime_listE;
|
||||
|
||||
# for parallel mode
|
||||
_ZN14__gnu_parallel9_Settings3getEv;
|
||||
_ZN14__gnu_parallel9_Settings3setERS0_;
|
||||
|
||||
_ZNSt9__cxx199815_List_node_base4hook*;
|
||||
_ZNSt9__cxx199815_List_node_base4swap*;
|
||||
_ZNSt9__cxx199815_List_node_base6unhookEv;
|
||||
|
|
@ -788,10 +796,6 @@ GLIBCXX_3.4.10 {
|
|||
|
||||
_ZNSt15basic_streambufI[cw]St11char_traitsI[cw]EE6stosscEv;
|
||||
|
||||
_ZSt17__verify_grouping*;
|
||||
|
||||
_ZNSt8__detail12__prime_listE;
|
||||
_ZNSt3tr18__detail12__prime_listE;
|
||||
|
||||
_ZN9__gnu_cxx18stdio_sync_filebufI[cw]St11char_traitsI[cw]EE4syncEv;
|
||||
_ZN9__gnu_cxx18stdio_sync_filebufI[cw]St11char_traitsI[cw]EE[5-9CD]*;
|
||||
|
|
|
|||
|
|
@ -87,17 +87,16 @@ namespace __parallel
|
|||
Function
|
||||
for_each_switch(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Function f, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::for_each_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().for_each_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
bool dummy;
|
||||
__gnu_parallel::for_each_selector<RandomAccessIterator>
|
||||
functionality;
|
||||
__gnu_parallel::for_each_selector<RandomAccessIterator> functionality;
|
||||
|
||||
return __gnu_parallel::
|
||||
for_each_template_random_access(begin, end, f, functionality,
|
||||
|
|
@ -112,7 +111,7 @@ namespace __parallel
|
|||
template<typename Iterator, typename Function>
|
||||
inline Function
|
||||
for_each(Iterator begin, Iterator end, Function f,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef std::iterator_traits<Iterator> iterator_traits;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
|
|
@ -333,7 +332,7 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(last - begin)
|
||||
> __gnu_parallel::Settings::unique_copy_minimal_n))
|
||||
> __gnu_parallel::_Settings::get().unique_copy_minimal_n))
|
||||
return __gnu_parallel::parallel_unique_copy(begin, last, out, pred);
|
||||
else
|
||||
return _GLIBCXX_STD_P::unique_copy(begin, last, out, pred);
|
||||
|
|
@ -413,9 +412,9 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1)
|
||||
>= __gnu_parallel::Settings::set_union_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().set_union_minimal_n
|
||||
|| static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2)
|
||||
>= __gnu_parallel::Settings::set_union_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().set_union_minimal_n))
|
||||
return __gnu_parallel::parallel_set_union(begin1, end1,
|
||||
begin2, end2, result, pred);
|
||||
else
|
||||
|
|
@ -519,9 +518,9 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1)
|
||||
>= __gnu_parallel::Settings::set_union_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().set_union_minimal_n
|
||||
|| static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2)
|
||||
>= __gnu_parallel::Settings::set_union_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().set_union_minimal_n))
|
||||
return __gnu_parallel::parallel_set_intersection(begin1, end1, begin2,
|
||||
end2, result, pred);
|
||||
else
|
||||
|
|
@ -632,9 +631,9 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1)
|
||||
>= __gnu_parallel::Settings::set_symmetric_difference_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().set_symmetric_difference_minimal_n
|
||||
|| static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2)
|
||||
>= __gnu_parallel::Settings::set_symmetric_difference_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().set_symmetric_difference_minimal_n))
|
||||
return __gnu_parallel::parallel_set_symmetric_difference(begin1, end1,
|
||||
begin2, end2,
|
||||
result, pred);
|
||||
|
|
@ -741,9 +740,9 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1)
|
||||
>= __gnu_parallel::Settings::set_difference_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().set_difference_minimal_n
|
||||
|| static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2)
|
||||
>= __gnu_parallel::Settings::set_difference_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().set_difference_minimal_n))
|
||||
return __gnu_parallel::parallel_set_difference(begin1, end1,
|
||||
begin2, end2,
|
||||
result, pred);
|
||||
|
|
@ -903,7 +902,7 @@ namespace __parallel
|
|||
typename iterator_traits<RandomAccessIterator>::difference_type
|
||||
count_switch(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
const T& value, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_unbalanced)
|
||||
{
|
||||
typedef iterator_traits<RandomAccessIterator> traits_type;
|
||||
|
|
@ -913,7 +912,7 @@ namespace __parallel
|
|||
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::count_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().count_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
__gnu_parallel::count_selector<RandomAccessIterator, difference_type>
|
||||
|
|
@ -941,7 +940,7 @@ namespace __parallel
|
|||
template<typename InputIterator, typename T>
|
||||
inline typename iterator_traits<InputIterator>::difference_type
|
||||
count(InputIterator begin, InputIterator end, const T& value,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::iterator_category iterator_category;
|
||||
|
|
@ -971,7 +970,7 @@ namespace __parallel
|
|||
typename iterator_traits<RandomAccessIterator>::difference_type
|
||||
count_if_switch(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Predicate pred, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_unbalanced)
|
||||
{
|
||||
typedef iterator_traits<RandomAccessIterator> traits_type;
|
||||
|
|
@ -981,7 +980,7 @@ namespace __parallel
|
|||
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::count_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().count_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
difference_type res = 0;
|
||||
|
|
@ -1010,7 +1009,7 @@ namespace __parallel
|
|||
template<typename InputIterator, typename Predicate>
|
||||
inline typename iterator_traits<InputIterator>::difference_type
|
||||
count_if(InputIterator begin, InputIterator end, Predicate pred,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::iterator_category iterator_category;
|
||||
|
|
@ -1217,12 +1216,12 @@ namespace __parallel
|
|||
transform1_switch(RandomAccessIterator1 begin, RandomAccessIterator1 end,
|
||||
RandomAccessIterator2 result, UnaryOperation unary_op,
|
||||
random_access_iterator_tag, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::transform_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().transform_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
bool dummy = true;
|
||||
|
|
@ -1259,7 +1258,7 @@ namespace __parallel
|
|||
inline OutputIterator
|
||||
transform(InputIterator begin, InputIterator end, OutputIterator result,
|
||||
UnaryOperation unary_op,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator> iteratori_traits;
|
||||
typedef std::iterator_traits<OutputIterator> iteratoro_traits;
|
||||
|
|
@ -1306,11 +1305,11 @@ namespace __parallel
|
|||
RandomAccessIterator3 result, BinaryOperation binary_op,
|
||||
random_access_iterator_tag, random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
(end1 - begin1) >= __gnu_parallel::Settings::transform_minimal_n
|
||||
(end1 - begin1) >= __gnu_parallel::_Settings::get().transform_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
bool dummy = true;
|
||||
|
|
@ -1352,7 +1351,7 @@ namespace __parallel
|
|||
transform(InputIterator1 begin1, InputIterator1 end1,
|
||||
InputIterator2 begin2, OutputIterator result,
|
||||
BinaryOperation binary_op,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator1> iteratori1_traits;
|
||||
typedef typename iteratori1_traits::iterator_category
|
||||
|
|
@ -1410,7 +1409,7 @@ namespace __parallel
|
|||
replace_switch(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
const T& old_value, const T& new_value,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
// XXX parallel version is where?
|
||||
|
|
@ -1422,7 +1421,7 @@ namespace __parallel
|
|||
template<typename ForwardIterator, typename T>
|
||||
inline void
|
||||
replace(ForwardIterator begin, ForwardIterator end, const T& old_value,
|
||||
const T& new_value, __gnu_parallel::parallelism parallelism_tag)
|
||||
const T& new_value, __gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<ForwardIterator> traits_type;
|
||||
typedef typename traits_type::iterator_category iterator_category;
|
||||
|
|
@ -1463,12 +1462,12 @@ namespace __parallel
|
|||
replace_if_switch(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Predicate pred, const T& new_value,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::replace_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().replace_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
bool dummy;
|
||||
|
|
@ -1491,7 +1490,7 @@ namespace __parallel
|
|||
inline void
|
||||
replace_if(ForwardIterator begin, ForwardIterator end,
|
||||
Predicate pred, const T& new_value,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef std::iterator_traits<ForwardIterator> iterator_traits;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
|
|
@ -1528,12 +1527,12 @@ namespace __parallel
|
|||
void
|
||||
generate_switch(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Generator gen, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::generate_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().generate_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
bool dummy;
|
||||
|
|
@ -1552,7 +1551,7 @@ namespace __parallel
|
|||
template<typename ForwardIterator, typename Generator>
|
||||
inline void
|
||||
generate(ForwardIterator begin, ForwardIterator end,
|
||||
Generator gen, __gnu_parallel::parallelism parallelism_tag)
|
||||
Generator gen, __gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef std::iterator_traits<ForwardIterator> iterator_traits;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
|
|
@ -1588,7 +1587,7 @@ namespace __parallel
|
|||
inline RandomAccessIterator
|
||||
generate_n_switch(RandomAccessIterator begin, Size n, Generator gen,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
// XXX parallel version is where?
|
||||
|
|
@ -1599,7 +1598,7 @@ namespace __parallel
|
|||
template<typename OutputIterator, typename Size, typename Generator>
|
||||
inline OutputIterator
|
||||
generate_n(OutputIterator begin, Size n, Generator gen,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef std::iterator_traits<OutputIterator> iterator_traits;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
|
|
@ -1661,7 +1660,7 @@ namespace __parallel
|
|||
return;
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::random_shuffle_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().random_shuffle_minimal_n))
|
||||
__gnu_parallel::parallel_random_shuffle(begin, end, rand);
|
||||
else
|
||||
__gnu_parallel::sequential_random_shuffle(begin, end, rand);
|
||||
|
|
@ -1689,7 +1688,7 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::partition_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().partition_minimal_n))
|
||||
{
|
||||
typedef typename std::iterator_traits<RandomAccessIterator>::
|
||||
difference_type difference_type;
|
||||
|
|
@ -1748,7 +1747,7 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::sort_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().sort_minimal_n))
|
||||
__gnu_parallel::parallel_sort(begin, end, comp, false);
|
||||
else
|
||||
sort(begin, end, comp, __gnu_parallel::sequential_tag());
|
||||
|
|
@ -1788,7 +1787,7 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::sort_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().sort_minimal_n))
|
||||
__gnu_parallel::parallel_sort(begin, end, comp, true);
|
||||
else
|
||||
stable_sort(begin, end, comp, __gnu_parallel::sequential_tag());
|
||||
|
|
@ -1837,9 +1836,9 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1)
|
||||
>= __gnu_parallel::Settings::merge_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().merge_minimal_n
|
||||
|| static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2)
|
||||
>= __gnu_parallel::Settings::merge_minimal_n)))
|
||||
>= __gnu_parallel::_Settings::get().merge_minimal_n)))
|
||||
return __gnu_parallel::parallel_merge_advance(begin1, end1,
|
||||
begin2, end2,
|
||||
result, (end1 - begin1)
|
||||
|
|
@ -1913,7 +1912,7 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::nth_element_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().nth_element_minimal_n))
|
||||
__gnu_parallel::parallel_nth_element(begin, nth, end, comp);
|
||||
else
|
||||
nth_element(begin, nth, end, comp, __gnu_parallel::sequential_tag());
|
||||
|
|
@ -1953,7 +1952,7 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::partial_sort_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().partial_sort_minimal_n))
|
||||
__gnu_parallel::parallel_partial_sort(begin, middle, end, comp);
|
||||
else
|
||||
partial_sort(begin, middle, end, comp,
|
||||
|
|
@ -1997,12 +1996,12 @@ namespace __parallel
|
|||
RandomAccessIterator
|
||||
max_element_switch(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::max_element_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().max_element_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
RandomAccessIterator res(begin);
|
||||
|
|
@ -2026,7 +2025,7 @@ namespace __parallel
|
|||
template<typename ForwardIterator>
|
||||
inline ForwardIterator
|
||||
max_element(ForwardIterator begin, ForwardIterator end,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef typename iterator_traits<ForwardIterator>::value_type value_type;
|
||||
return max_element(begin, end, std::less<value_type>(), parallelism_tag);
|
||||
|
|
@ -2044,7 +2043,7 @@ namespace __parallel
|
|||
template<typename ForwardIterator, typename Comparator>
|
||||
inline ForwardIterator
|
||||
max_element(ForwardIterator begin, ForwardIterator end, Comparator comp,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<ForwardIterator> traits_type;
|
||||
typedef typename traits_type::iterator_category iterator_category;
|
||||
|
|
@ -2088,12 +2087,12 @@ namespace __parallel
|
|||
RandomAccessIterator
|
||||
min_element_switch(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::min_element_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().min_element_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
RandomAccessIterator res(begin);
|
||||
|
|
@ -2117,7 +2116,7 @@ namespace __parallel
|
|||
template<typename ForwardIterator>
|
||||
inline ForwardIterator
|
||||
min_element(ForwardIterator begin, ForwardIterator end,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef typename iterator_traits<ForwardIterator>::value_type value_type;
|
||||
return min_element(begin, end, std::less<value_type>(), parallelism_tag);
|
||||
|
|
@ -2135,7 +2134,7 @@ namespace __parallel
|
|||
template<typename ForwardIterator, typename Comparator>
|
||||
inline ForwardIterator
|
||||
min_element(ForwardIterator begin, ForwardIterator end, Comparator comp,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<ForwardIterator> traits_type;
|
||||
typedef typename traits_type::iterator_category iterator_category;
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Tp>
|
||||
typename iterator_traits<_IIter>::difference_type
|
||||
count(_IIter, _IIter, const _Tp&, __gnu_parallel::parallelism);
|
||||
count(_IIter, _IIter, const _Tp&, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _Tp, typename _IterTag>
|
||||
typename iterator_traits<_IIter>::difference_type
|
||||
|
|
@ -99,7 +99,7 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Tp>
|
||||
typename iterator_traits<_RAIter>::difference_type
|
||||
count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
|
||||
template<typename _IIter, typename _Predicate>
|
||||
|
|
@ -112,7 +112,7 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Predicate>
|
||||
typename iterator_traits<_IIter>::difference_type
|
||||
count_if(_IIter, _IIter, _Predicate, __gnu_parallel::parallelism);
|
||||
count_if(_IIter, _IIter, _Predicate, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _Predicate, typename _IterTag>
|
||||
typename iterator_traits<_IIter>::difference_type
|
||||
|
|
@ -121,7 +121,7 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Predicate>
|
||||
typename iterator_traits<_RAIter>::difference_type
|
||||
count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
// algobase.h
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
|
|
@ -219,7 +219,7 @@ namespace __parallel
|
|||
|
||||
template<typename _Iterator, typename _Function>
|
||||
_Function
|
||||
for_each(_Iterator, _Iterator, _Function, __gnu_parallel::parallelism);
|
||||
for_each(_Iterator, _Iterator, _Function, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _Function, typename _IterTag>
|
||||
_Function
|
||||
|
|
@ -228,7 +228,7 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Function>
|
||||
_Function
|
||||
for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
|
||||
template<typename _FIter, typename _Generator>
|
||||
|
|
@ -241,7 +241,7 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _Generator>
|
||||
void
|
||||
generate(_FIter, _FIter, _Generator, __gnu_parallel::parallelism);
|
||||
generate(_FIter, _FIter, _Generator, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _FIter, typename _Generator, typename _IterTag>
|
||||
void
|
||||
|
|
@ -250,7 +250,7 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Generator>
|
||||
void
|
||||
generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _OIter, typename _Size, typename _Generator>
|
||||
_OIter
|
||||
|
|
@ -262,7 +262,7 @@ namespace __parallel
|
|||
|
||||
template<typename _OIter, typename _Size, typename _Generator>
|
||||
_OIter
|
||||
generate_n(_OIter, _Size, _Generator, __gnu_parallel::parallelism);
|
||||
generate_n(_OIter, _Size, _Generator, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _OIter, typename _Size, typename _Generator,
|
||||
typename _IterTag>
|
||||
|
|
@ -272,7 +272,7 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Size, typename _Generator>
|
||||
_RAIter
|
||||
generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter1, typename _IIter2>
|
||||
bool
|
||||
|
|
@ -416,7 +416,7 @@ namespace __parallel
|
|||
template<typename _IIter, typename _OIter, typename UnaryOperation>
|
||||
_OIter
|
||||
transform(_IIter, _IIter, _OIter, UnaryOperation,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename UnaryOperation,
|
||||
typename _IterTag1, typename _IterTag2>
|
||||
|
|
@ -429,7 +429,7 @@ namespace __parallel
|
|||
_RAOIter
|
||||
transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation,
|
||||
random_access_iterator_tag, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _OIter,
|
||||
|
|
@ -447,7 +447,7 @@ namespace __parallel
|
|||
typename _BiOperation>
|
||||
_OIter
|
||||
transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _RAIter3,
|
||||
typename _BiOperation>
|
||||
|
|
@ -455,7 +455,7 @@ namespace __parallel
|
|||
transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation,
|
||||
random_access_iterator_tag, random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _OIter,
|
||||
typename _BiOperation, typename _Tag1,
|
||||
|
|
@ -477,7 +477,7 @@ namespace __parallel
|
|||
template<typename _FIter, typename _Tp>
|
||||
void
|
||||
replace(_FIter, _FIter, const _Tp&, const _Tp&,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _FIter, typename _Tp, typename _IterTag>
|
||||
void
|
||||
|
|
@ -486,7 +486,7 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Tp>
|
||||
void
|
||||
replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&,
|
||||
random_access_iterator_tag, __gnu_parallel::parallelism);
|
||||
random_access_iterator_tag, __gnu_parallel::_Parallelism);
|
||||
|
||||
|
||||
template<typename _FIter, typename _Predicate, typename _Tp>
|
||||
|
|
@ -501,7 +501,7 @@ namespace __parallel
|
|||
template<typename _FIter, typename _Predicate, typename _Tp>
|
||||
void
|
||||
replace_if(_FIter, _FIter, _Predicate, const _Tp&,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _FIter, typename _Predicate, typename _Tp,
|
||||
typename _IterTag>
|
||||
|
|
@ -512,7 +512,7 @@ namespace __parallel
|
|||
void
|
||||
replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
|
||||
template<typename _FIter>
|
||||
|
|
@ -525,7 +525,7 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter>
|
||||
_FIter
|
||||
max_element(_FIter, _FIter, __gnu_parallel::parallelism);
|
||||
max_element(_FIter, _FIter, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _FIter, typename _Compare>
|
||||
_FIter
|
||||
|
|
@ -537,7 +537,7 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _Compare>
|
||||
_FIter
|
||||
max_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism);
|
||||
max_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _FIter, typename _Compare, typename _IterTag>
|
||||
_FIter
|
||||
|
|
@ -546,7 +546,7 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Compare>
|
||||
_RAIter
|
||||
max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _OIter>
|
||||
|
|
@ -594,7 +594,7 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter>
|
||||
_FIter
|
||||
min_element(_FIter, _FIter, __gnu_parallel::parallelism parallelism_tag);
|
||||
min_element(_FIter, _FIter, __gnu_parallel::_Parallelism parallelism_tag);
|
||||
|
||||
template<typename _FIter, typename _Compare>
|
||||
_FIter
|
||||
|
|
@ -606,7 +606,7 @@ namespace __parallel
|
|||
|
||||
template<typename _FIter, typename _Compare>
|
||||
_FIter
|
||||
min_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism);
|
||||
min_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _FIter, typename _Compare, typename _IterTag>
|
||||
_FIter
|
||||
|
|
@ -615,7 +615,7 @@ namespace __parallel
|
|||
template<typename _RAIter, typename _Compare>
|
||||
_RAIter
|
||||
min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _RAIter>
|
||||
void
|
||||
|
|
|
|||
|
|
@ -252,7 +252,7 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
|
||||
QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam];
|
||||
|
||||
difference_type base_case_n = Settings::sort_qsb_base_case_maximal_n;
|
||||
difference_type base_case_n = _Settings::get().sort_qsb_base_case_maximal_n;
|
||||
if (base_case_n < 2)
|
||||
base_case_n = 2;
|
||||
thread_index_t num_threads = tl.num_threads;
|
||||
|
|
|
|||
|
|
@ -38,11 +38,12 @@
|
|||
#ifndef _GLIBCXX_PARALLEL_BASE_H
|
||||
#define _GLIBCXX_PARALLEL_BASE_H 1
|
||||
|
||||
#include <parallel/features.h>
|
||||
#include <cstdio>
|
||||
#include <functional>
|
||||
#include <omp.h>
|
||||
#include <parallel/features.h>
|
||||
#include <parallel/basic_iterator.h>
|
||||
#include <parallel/parallel.h>
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
// Parallel mode namespaces.
|
||||
|
|
@ -67,6 +68,7 @@ namespace __gnu_parallel
|
|||
*/
|
||||
namespace __gnu_sequential
|
||||
{
|
||||
// Import whatever is the serial version.
|
||||
#ifdef _GLIBCXX_PARALLEL
|
||||
using namespace std::__norm;
|
||||
#else
|
||||
|
|
@ -77,6 +79,22 @@ namespace __gnu_sequential
|
|||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
// NB: Including this file cannot produce (unresolved) symbols from
|
||||
// the OpenMP runtime unless the parallel mode is actually invoked
|
||||
// and active, which imples that the OpenMP runtime is actually
|
||||
// going to be linked in.
|
||||
inline int
|
||||
get_max_threads()
|
||||
{
|
||||
int __i = omp_get_max_threads();
|
||||
return __i > 1 ? __i : 1;
|
||||
}
|
||||
|
||||
|
||||
inline bool
|
||||
is_parallel(const _Parallelism __p) { return __p != sequential; }
|
||||
|
||||
|
||||
// XXX remove std::duplicates from here if possible,
|
||||
// XXX but keep minimal dependencies.
|
||||
|
||||
|
|
@ -175,11 +193,8 @@ template<typename _Predicate, typename argument_type>
|
|||
|
||||
/** @brief Similar to std::binder1st,
|
||||
* but giving the argument types explicitly. */
|
||||
template<
|
||||
typename _Operation,
|
||||
typename first_argument_type,
|
||||
typename second_argument_type,
|
||||
typename result_type>
|
||||
template<typename _Operation, typename first_argument_type,
|
||||
typename second_argument_type, typename result_type>
|
||||
class binder1st
|
||||
: public std::unary_function<second_argument_type, result_type>
|
||||
{
|
||||
|
|
@ -207,11 +222,8 @@ template<
|
|||
* @brief Similar to std::binder2nd, but giving the argument types
|
||||
* explicitly.
|
||||
*/
|
||||
template<
|
||||
typename _Operation,
|
||||
typename first_argument_type,
|
||||
typename second_argument_type,
|
||||
typename result_type>
|
||||
template<typename _Operation, typename first_argument_type,
|
||||
typename second_argument_type, typename result_type>
|
||||
class binder2nd
|
||||
: public std::unary_function<first_argument_type, result_type>
|
||||
{
|
||||
|
|
|
|||
|
|
@ -51,13 +51,11 @@ namespace __gnu_parallel
|
|||
* @returns End of splitter sequence, i. e. @c s+num_threads+1 */
|
||||
template<typename difference_type, typename OutputIterator>
|
||||
OutputIterator
|
||||
equally_split(difference_type n,
|
||||
thread_index_t num_threads,
|
||||
OutputIterator s)
|
||||
equally_split(difference_type n, thread_index_t num_threads, OutputIterator s)
|
||||
{
|
||||
difference_type chunk_length = n / num_threads,
|
||||
num_longer_chunks = n % num_threads,
|
||||
pos = 0;
|
||||
difference_type chunk_length = n / num_threads;
|
||||
difference_type num_longer_chunks = n % num_threads;
|
||||
difference_type pos = 0;
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
{
|
||||
*s++ = pos;
|
||||
|
|
@ -75,17 +73,16 @@ template<typename difference_type, typename OutputIterator>
|
|||
* thread number thread_no+1 (excluded).
|
||||
* @param n Number of elements
|
||||
* @param num_threads Number of parts
|
||||
* @returns Splitting point */
|
||||
* @returns _SplittingAlgorithm point */
|
||||
template<typename difference_type>
|
||||
difference_type
|
||||
equally_split_point(difference_type n,
|
||||
thread_index_t num_threads,
|
||||
thread_index_t thread_no)
|
||||
{
|
||||
difference_type chunk_length = n / num_threads,
|
||||
num_longer_chunks = n % num_threads;
|
||||
|
||||
if(thread_no < num_longer_chunks)
|
||||
difference_type chunk_length = n / num_threads;
|
||||
difference_type num_longer_chunks = n % num_threads;
|
||||
if (thread_no < num_longer_chunks)
|
||||
return thread_no * (chunk_length + 1);
|
||||
else
|
||||
return num_longer_chunks * (chunk_length + 1)
|
||||
|
|
|
|||
|
|
@ -43,21 +43,21 @@
|
|||
#ifndef _GLIBCXX_MERGESORT
|
||||
/** @def _GLIBCXX_MERGESORT
|
||||
* @brief Include parallel multi-way mergesort.
|
||||
* @see __gnu_parallel::Settings::sort_algorithm */
|
||||
* @see __gnu_parallel::_Settings::sort_algorithm */
|
||||
#define _GLIBCXX_MERGESORT 1
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_QUICKSORT
|
||||
/** @def _GLIBCXX_QUICKSORT
|
||||
* @brief Include parallel unbalanced quicksort.
|
||||
* @see __gnu_parallel::Settings::sort_algorithm */
|
||||
* @see __gnu_parallel::_Settings::sort_algorithm */
|
||||
#define _GLIBCXX_QUICKSORT 1
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_BAL_QUICKSORT
|
||||
/** @def _GLIBCXX_BAL_QUICKSORT
|
||||
* @brief Include parallel dynamically load-balanced quicksort.
|
||||
* @see __gnu_parallel::Settings::sort_algorithm */
|
||||
* @see __gnu_parallel::_Settings::sort_algorithm */
|
||||
#define _GLIBCXX_BAL_QUICKSORT 1
|
||||
#endif
|
||||
|
||||
|
|
@ -65,7 +65,7 @@
|
|||
/** @def _GLIBCXX_LOSER_TREE
|
||||
* @brief Include guarded (sequences may run empty) loser tree,
|
||||
* moving objects.
|
||||
* @see __gnu_parallel::Settings multiway_merge_algorithm */
|
||||
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
|
||||
#define _GLIBCXX_LOSER_TREE 1
|
||||
#endif
|
||||
|
||||
|
|
@ -73,21 +73,21 @@
|
|||
/** @def _GLIBCXX_LOSER_TREE_EXPLICIT
|
||||
* @brief Include standard loser tree, storing two flags for infimum
|
||||
* and supremum.
|
||||
* @see __gnu_parallel::Settings multiway_merge_algorithm */
|
||||
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
|
||||
#define _GLIBCXX_LOSER_TREE_EXPLICIT 0
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_LOSER_TREE_REFERENCE
|
||||
/** @def _GLIBCXX_LOSER_TREE_REFERENCE
|
||||
* @brief Include some loser tree variant.
|
||||
* @see __gnu_parallel::Settings multiway_merge_algorithm */
|
||||
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
|
||||
#define _GLIBCXX_LOSER_TREE_REFERENCE 0
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_LOSER_TREE_POINTER
|
||||
/** @def _GLIBCXX_LOSER_TREE_POINTER
|
||||
* @brief Include some loser tree variant.
|
||||
* @see __gnu_parallel::Settings multiway_merge_algorithm */
|
||||
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
|
||||
#define _GLIBCXX_LOSER_TREE_POINTER 1
|
||||
#endif
|
||||
|
||||
|
|
@ -95,48 +95,48 @@
|
|||
/** @def _GLIBCXX_LOSER_TREE_UNGUARDED
|
||||
* @brief Include unguarded (sequences must not run empty) loser
|
||||
* tree, moving objects.
|
||||
* @see __gnu_parallel::Settings multiway_merge_algorithm */
|
||||
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
|
||||
#define _GLIBCXX_LOSER_TREE_UNGUARDED 0
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
|
||||
/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
|
||||
* @brief Include some loser tree variant.
|
||||
* @see __gnu_parallel::Settings multiway_merge_algorithm */
|
||||
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
|
||||
#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_LOSER_TREE_COMBINED
|
||||
/** @def _GLIBCXX_LOSER_TREE_COMBINED
|
||||
* @brief Include some loser tree variant.
|
||||
* @see __gnu_parallel::Settings multiway_merge_algorithm */
|
||||
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
|
||||
#define _GLIBCXX_LOSER_TREE_COMBINED 0
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_LOSER_TREE_SENTINEL
|
||||
/** @def _GLIBCXX_LOSER_TREE_SENTINEL
|
||||
* @brief Include some loser tree variant.
|
||||
* @see __gnu_parallel::Settings multiway_merge_algorithm */
|
||||
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
|
||||
#define _GLIBCXX_LOSER_TREE_SENTINEL 0
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef _GLIBCXX_FIND_GROWING_BLOCKS
|
||||
/** @brief Include the growing blocks variant for std::find.
|
||||
* @see __gnu_parallel::Settings::find_distribution */
|
||||
* @see __gnu_parallel::_Settings::find_algorithm */
|
||||
#define _GLIBCXX_FIND_GROWING_BLOCKS 1
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
|
||||
/** @brief Include the equal-sized blocks variant for std::find.
|
||||
* @see __gnu_parallel::Settings::find_distribution */
|
||||
* @see __gnu_parallel::_Settings::find_algorithm */
|
||||
#define _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 1
|
||||
#endif
|
||||
|
||||
#ifndef _GLIBCXX_FIND_EQUAL_SPLIT
|
||||
/** @def _GLIBCXX_FIND_EQUAL_SPLIT
|
||||
* @brief Include the equal splitting variant for std::find.
|
||||
* @see __gnu_parallel::Settings::find_distribution */
|
||||
* @see __gnu_parallel::_Settings::find_algorithm */
|
||||
#define _GLIBCXX_FIND_EQUAL_SPLIT 1
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -66,15 +66,15 @@ template<typename RandomAccessIterator1,
|
|||
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
|
||||
RandomAccessIterator2 begin2, Pred pred, Selector selector)
|
||||
{
|
||||
switch (Settings::find_distribution)
|
||||
switch (_Settings::get().find_algorithm)
|
||||
{
|
||||
case Settings::GROWING_BLOCKS:
|
||||
case GROWING_BLOCKS:
|
||||
return find_template(begin1, end1, begin2, pred, selector,
|
||||
growing_blocks_tag());
|
||||
case Settings::CONSTANT_SIZE_BLOCKS:
|
||||
case CONSTANT_SIZE_BLOCKS:
|
||||
return find_template(begin1, end1, begin2, pred, selector,
|
||||
constant_size_blocks_tag());
|
||||
case Settings::EQUAL_SPLIT:
|
||||
case EQUAL_SPLIT:
|
||||
return find_template(begin1, end1, begin2, pred, selector,
|
||||
equal_split_tag());
|
||||
default:
|
||||
|
|
@ -176,10 +176,10 @@ template<typename RandomAccessIterator1,
|
|||
* @param pred Find predicate.
|
||||
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
* @see __gnu_parallel::Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::Settings::find_initial_block_size
|
||||
* @see __gnu_parallel::Settings::find_maximum_block_size
|
||||
* @see __gnu_parallel::Settings::find_increasing_factor
|
||||
* @see __gnu_parallel::_Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::_Settings::find_initial_block_size
|
||||
* @see __gnu_parallel::_Settings::find_maximum_block_size
|
||||
* @see __gnu_parallel::_Settings::find_increasing_factor
|
||||
*
|
||||
* There are two main differences between the growing blocks and
|
||||
* the constant-size blocks variants.
|
||||
|
|
@ -204,10 +204,12 @@ template<typename RandomAccessIterator1,
|
|||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
difference_type length = end1 - begin1;
|
||||
|
||||
difference_type sequential_search_size =
|
||||
std::min<difference_type>(length, Settings::find_sequential_search_size);
|
||||
std::min<difference_type>(length, __s.find_sequential_search_size);
|
||||
|
||||
// Try it sequentially first.
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
|
||||
|
|
@ -233,7 +235,7 @@ template<typename RandomAccessIterator1,
|
|||
// Not within first k elements -> start parallel.
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
|
||||
difference_type block_size = Settings::find_initial_block_size;
|
||||
difference_type block_size = __s.find_initial_block_size;
|
||||
difference_type start =
|
||||
fetch_and_add<difference_type>(&next_block_start, block_size);
|
||||
|
||||
|
|
@ -269,9 +271,8 @@ template<typename RandomAccessIterator1,
|
|||
}
|
||||
|
||||
block_size =
|
||||
std::min<difference_type>(block_size
|
||||
* Settings::find_increasing_factor,
|
||||
Settings::find_maximum_block_size);
|
||||
std::min<difference_type>(block_size * __s.find_increasing_factor,
|
||||
__s.find_maximum_block_size);
|
||||
|
||||
// Get new block, update pointer to next block.
|
||||
start =
|
||||
|
|
@ -302,8 +303,8 @@ template<typename RandomAccessIterator1,
|
|||
* @param pred Find predicate.
|
||||
* @param selector Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
* @see __gnu_parallel::Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::Settings::find_block_size
|
||||
* @see __gnu_parallel::_Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::_Settings::find_block_size
|
||||
* There are two main differences between the growing blocks and the
|
||||
* constant-size blocks variants.
|
||||
* 1. For GB, the block size grows; for CSB, the block size is fixed.
|
||||
|
|
@ -325,10 +326,12 @@ template<typename RandomAccessIterator1,
|
|||
typedef typename traits_type::difference_type difference_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
difference_type length = end1 - begin1;
|
||||
|
||||
difference_type sequential_search_size = std::min<difference_type>(
|
||||
length, Settings::find_sequential_search_size);
|
||||
length, __s.find_sequential_search_size);
|
||||
|
||||
// Try it sequentially first.
|
||||
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
|
||||
|
|
@ -351,7 +354,7 @@ template<typename RandomAccessIterator1,
|
|||
num_threads = omp_get_num_threads();
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
difference_type block_size = Settings::find_initial_block_size;
|
||||
difference_type block_size = __s.find_initial_block_size;
|
||||
|
||||
// First element of thread's current iteration.
|
||||
difference_type iteration_start = sequential_search_size;
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ namespace __gnu_parallel
|
|||
Result& output, typename
|
||||
std::iterator_traits<InputIterator>::
|
||||
difference_type bound,
|
||||
parallelism parallelism_tag)
|
||||
_Parallelism parallelism_tag)
|
||||
{
|
||||
if (parallelism_tag == parallel_unbalanced)
|
||||
return for_each_template_random_access_ed(begin, end, user_op,
|
||||
|
|
|
|||
|
|
@ -1359,11 +1359,10 @@ template<typename RandomAccessIteratorIterator,
|
|||
RandomAccessIterator3 return_target = target;
|
||||
int k = static_cast<int>(seqs_end - seqs_begin);
|
||||
|
||||
Settings::MultiwayMergeAlgorithm mwma =
|
||||
Settings::multiway_merge_algorithm;
|
||||
_MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm;
|
||||
|
||||
if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
|
||||
mwma = Settings::LOSER_TREE_COMBINED;
|
||||
if (!sentinel && mwma == LOSER_TREE_SENTINEL)
|
||||
mwma = LOSER_TREE_COMBINED;
|
||||
|
||||
switch (k)
|
||||
{
|
||||
|
|
@ -1385,14 +1384,14 @@ template<typename RandomAccessIteratorIterator,
|
|||
case 3:
|
||||
switch (mwma)
|
||||
{
|
||||
case Settings::LOSER_TREE_COMBINED:
|
||||
case LOSER_TREE_COMBINED:
|
||||
return_target = multiway_merge_3_combined(seqs_begin,
|
||||
seqs_end,
|
||||
target,
|
||||
comp, length,
|
||||
stable);
|
||||
break;
|
||||
case Settings::LOSER_TREE_SENTINEL:
|
||||
case LOSER_TREE_SENTINEL:
|
||||
return_target =
|
||||
multiway_merge_3_variant<unguarded_iterator>(seqs_begin,
|
||||
seqs_end,
|
||||
|
|
@ -1413,13 +1412,13 @@ template<typename RandomAccessIteratorIterator,
|
|||
case 4:
|
||||
switch (mwma)
|
||||
{
|
||||
case Settings::LOSER_TREE_COMBINED:
|
||||
case LOSER_TREE_COMBINED:
|
||||
return_target = multiway_merge_4_combined(seqs_begin,
|
||||
seqs_end,
|
||||
target,
|
||||
comp, length, stable);
|
||||
break;
|
||||
case Settings::LOSER_TREE_SENTINEL:
|
||||
case LOSER_TREE_SENTINEL:
|
||||
return_target =
|
||||
multiway_merge_4_variant<unguarded_iterator>(seqs_begin,
|
||||
seqs_end,
|
||||
|
|
@ -1440,14 +1439,14 @@ template<typename RandomAccessIteratorIterator,
|
|||
{
|
||||
switch (mwma)
|
||||
{
|
||||
case Settings::BUBBLE:
|
||||
case BUBBLE:
|
||||
return_target = multiway_merge_bubble(seqs_begin,
|
||||
seqs_end,
|
||||
target,
|
||||
comp, length, stable);
|
||||
break;
|
||||
#if _GLIBCXX_LOSER_TREE_EXPLICIT
|
||||
case Settings::LOSER_TREE_EXPLICIT:
|
||||
case LOSER_TREE_EXPLICIT:
|
||||
return_target = multiway_merge_loser_tree<
|
||||
LoserTreeExplicit<value_type, Comparator> >(seqs_begin,
|
||||
seqs_end,
|
||||
|
|
@ -1457,7 +1456,7 @@ template<typename RandomAccessIteratorIterator,
|
|||
break;
|
||||
#endif
|
||||
#if _GLIBCXX_LOSER_TREE
|
||||
case Settings::LOSER_TREE:
|
||||
case LOSER_TREE:
|
||||
return_target = multiway_merge_loser_tree<
|
||||
LoserTree<value_type, Comparator> >(seqs_begin,
|
||||
seqs_end,
|
||||
|
|
@ -1467,7 +1466,7 @@ template<typename RandomAccessIteratorIterator,
|
|||
break;
|
||||
#endif
|
||||
#if _GLIBCXX_LOSER_TREE_COMBINED
|
||||
case Settings::LOSER_TREE_COMBINED:
|
||||
case LOSER_TREE_COMBINED:
|
||||
return_target = multiway_merge_loser_tree_combined(seqs_begin,
|
||||
seqs_end,
|
||||
target,
|
||||
|
|
@ -1476,7 +1475,7 @@ template<typename RandomAccessIteratorIterator,
|
|||
break;
|
||||
#endif
|
||||
#if _GLIBCXX_LOSER_TREE_SENTINEL
|
||||
case Settings::LOSER_TREE_SENTINEL:
|
||||
case LOSER_TREE_SENTINEL:
|
||||
return_target = multiway_merge_loser_tree_sentinel(seqs_begin,
|
||||
seqs_end,
|
||||
target,
|
||||
|
|
@ -1550,6 +1549,7 @@ template<typename RandomAccessIteratorIterator,
|
|||
|
||||
thread_index_t num_threads = static_cast<thread_index_t>(
|
||||
std::min<difference_type>(get_max_threads(), total_length));
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
# pragma omp parallel num_threads (num_threads)
|
||||
{
|
||||
|
|
@ -1562,10 +1562,10 @@ template<typename RandomAccessIteratorIterator,
|
|||
for (int s = 0; s < num_threads; ++s)
|
||||
pieces[s].resize(k);
|
||||
|
||||
difference_type num_samples =
|
||||
Settings::merge_oversampling * num_threads;
|
||||
difference_type num_samples = __s.merge_oversampling
|
||||
* num_threads;
|
||||
|
||||
if (Settings::multiway_merge_splitting == Settings::SAMPLING)
|
||||
if (__s.multiway_merge_splitting == SAMPLING)
|
||||
{
|
||||
value_type* samples = static_cast<value_type*>(
|
||||
::operator new(sizeof(value_type) * k * num_samples));
|
||||
|
|
@ -1623,7 +1623,7 @@ template<typename RandomAccessIteratorIterator,
|
|||
}
|
||||
else
|
||||
{
|
||||
// (Settings::multiway_merge_splitting == Settings::EXACT).
|
||||
// (_Settings::multiway_merge_splitting == _Settings::EXACT).
|
||||
std::vector<RandomAccessIterator1>* offsets =
|
||||
new std::vector<RandomAccessIterator1>[num_threads];
|
||||
std::vector<
|
||||
|
|
@ -1768,10 +1768,12 @@ template<typename RandomAccessIteratorPairIterator,
|
|||
if (seqs_begin == seqs_end)
|
||||
return target;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
RandomAccessIterator3 target_end;
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
|
||||
&& ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
|
||||
((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k)
|
||||
&& ((sequence_index_t)length >= __s.multiway_merge_minimal_n)))
|
||||
target_end = parallel_multiway_merge(seqs_begin, seqs_end,
|
||||
target, comp,
|
||||
static_cast<difference_type>(length),
|
||||
|
|
@ -1813,15 +1815,14 @@ template<typename RandomAccessIteratorPairIterator,
|
|||
|
||||
_GLIBCXX_CALL(seqs_end - seqs_begin)
|
||||
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
|
||||
&& ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
|
||||
return parallel_multiway_merge(
|
||||
seqs_begin, seqs_end,
|
||||
target, comp, static_cast<difference_type>(length), stable, true);
|
||||
const _Settings& __s = _Settings::get();
|
||||
const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k;
|
||||
const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n;
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2))
|
||||
return parallel_multiway_merge(seqs_begin, seqs_end, target, comp,
|
||||
length, stable, true);
|
||||
else
|
||||
return multiway_merge(seqs_begin, seqs_end,
|
||||
target, comp, length, stable,
|
||||
return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable,
|
||||
true, sequential_tag());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -130,8 +130,7 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
|
|||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
|
||||
num_samples =
|
||||
Settings::sort_mwms_oversampling * sd->num_threads - 1;
|
||||
num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
|
||||
|
||||
difference_type* es = new difference_type[num_samples + 2];
|
||||
|
||||
|
|
@ -194,8 +193,8 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
|
||||
// Invariant: locally sorted subsequence in sd->sorting_places[iam],
|
||||
// sd->sorting_places[iam] + length_local.
|
||||
|
||||
if (Settings::sort_splitting == Settings::SAMPLING)
|
||||
const _Settings& __s = _Settings::get();
|
||||
if (__s.sort_splitting == SAMPLING)
|
||||
{
|
||||
difference_type num_samples;
|
||||
determine_samples(sd, num_samples);
|
||||
|
|
@ -237,7 +236,7 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
|
||||
}
|
||||
}
|
||||
else if (Settings::sort_splitting == Settings::EXACT)
|
||||
else if (__s.sort_splitting == EXACT)
|
||||
{
|
||||
# pragma omp barrier
|
||||
|
||||
|
|
@ -355,6 +354,7 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
// shared variables
|
||||
PMWMSSortingData<RandomAccessIterator> sd;
|
||||
difference_type* starts;
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
{
|
||||
|
|
@ -374,10 +374,10 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
sd.merging_places = new RandomAccessIterator[num_threads];
|
||||
#endif
|
||||
|
||||
if (Settings::sort_splitting == Settings::SAMPLING)
|
||||
if (__s.sort_splitting == SAMPLING)
|
||||
{
|
||||
unsigned int size =
|
||||
(Settings::sort_mwms_oversampling * num_threads - 1)
|
||||
(__s.sort_mwms_oversampling * num_threads - 1)
|
||||
* num_threads;
|
||||
sd.samples = static_cast<value_type*>(
|
||||
::operator new(size * sizeof(value_type)));
|
||||
|
|
@ -412,7 +412,7 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
delete[] sd.sorting_places;
|
||||
delete[] sd.merging_places;
|
||||
|
||||
if (Settings::sort_splitting == Settings::SAMPLING)
|
||||
if (__s.sort_splitting == SAMPLING)
|
||||
::operator delete(sd.samples);
|
||||
|
||||
delete[] sd.offsets;
|
||||
|
|
|
|||
|
|
@ -91,12 +91,12 @@ namespace __parallel
|
|||
accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end,
|
||||
T init, BinaryOperation binary_op,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_unbalanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::accumulate_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().accumulate_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
T res = init;
|
||||
|
|
@ -121,7 +121,7 @@ namespace __parallel
|
|||
template<typename InputIterator, typename T>
|
||||
inline T
|
||||
accumulate(InputIterator begin, InputIterator end, T init,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef std::iterator_traits<InputIterator> iterator_traits;
|
||||
typedef typename iterator_traits::value_type value_type;
|
||||
|
|
@ -149,7 +149,7 @@ namespace __parallel
|
|||
inline T
|
||||
accumulate(InputIterator begin, InputIterator end, T init,
|
||||
BinaryOperation binary_op,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> iterator_traits;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
|
|
@ -197,11 +197,11 @@ namespace __parallel
|
|||
BinaryFunction2 binary_op2,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_unbalanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1)
|
||||
>= __gnu_parallel::Settings::
|
||||
>= __gnu_parallel::_Settings::get().
|
||||
accumulate_minimal_n
|
||||
&& __gnu_parallel::
|
||||
is_parallel(parallelism_tag)))
|
||||
|
|
@ -241,7 +241,7 @@ namespace __parallel
|
|||
inner_product(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init, BinaryFunction1 binary_op1,
|
||||
BinaryFunction2 binary_op2,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator1> traits1_type;
|
||||
typedef typename traits1_type::iterator_category iterator1_category;
|
||||
|
|
@ -276,7 +276,7 @@ namespace __parallel
|
|||
inline T
|
||||
inner_product(InputIterator1 first1, InputIterator1 last1,
|
||||
InputIterator2 first2, T init,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator1> traits_type1;
|
||||
typedef typename traits_type1::value_type value_type1;
|
||||
|
|
@ -347,7 +347,7 @@ namespace __parallel
|
|||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::partial_sum_minimal_n))
|
||||
>= __gnu_parallel::_Settings::get().partial_sum_minimal_n))
|
||||
return __gnu_parallel::parallel_partial_sum(begin, end,
|
||||
result, bin_op);
|
||||
else
|
||||
|
|
@ -416,12 +416,12 @@ namespace __parallel
|
|||
OutputIterator result, BinaryOperation bin_op,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism parallelism_tag
|
||||
__gnu_parallel::_Parallelism parallelism_tag
|
||||
= __gnu_parallel::parallel_balanced)
|
||||
{
|
||||
if (_GLIBCXX_PARALLEL_CONDITION(
|
||||
static_cast<__gnu_parallel::sequence_index_t>(end - begin)
|
||||
>= __gnu_parallel::Settings::adjacent_difference_minimal_n
|
||||
>= __gnu_parallel::_Settings::get().adjacent_difference_minimal_n
|
||||
&& __gnu_parallel::is_parallel(parallelism_tag)))
|
||||
{
|
||||
bool dummy = true;
|
||||
|
|
@ -448,7 +448,7 @@ namespace __parallel
|
|||
inline OutputIterator
|
||||
adjacent_difference(InputIterator begin, InputIterator end,
|
||||
OutputIterator result,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
|
|
@ -471,7 +471,7 @@ namespace __parallel
|
|||
inline OutputIterator
|
||||
adjacent_difference(InputIterator begin, InputIterator end,
|
||||
OutputIterator result, BinaryOperation binary_op,
|
||||
__gnu_parallel::parallelism parallelism_tag)
|
||||
__gnu_parallel::_Parallelism parallelism_tag)
|
||||
{
|
||||
typedef iterator_traits<InputIterator> traitsi_type;
|
||||
typedef typename traitsi_type::iterator_category iteratori_category;
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ namespace __parallel
|
|||
|
||||
template<typename _IIter, typename _Tp>
|
||||
_Tp
|
||||
accumulate(_IIter, _IIter, _Tp, __gnu_parallel::parallelism);
|
||||
accumulate(_IIter, _IIter, _Tp, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _Tp, typename _Tag>
|
||||
_Tp
|
||||
|
|
@ -72,7 +72,7 @@ namespace __parallel
|
|||
template<typename _IIter, typename _Tp, typename _BinaryOper>
|
||||
_Tp
|
||||
accumulate(_IIter, _IIter, _Tp, _BinaryOper,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _Tp, typename _BinaryOper,
|
||||
typename _Tag>
|
||||
|
|
@ -83,7 +83,7 @@ namespace __parallel
|
|||
_Tp
|
||||
accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _OIter>
|
||||
_OIter
|
||||
|
|
@ -106,12 +106,12 @@ namespace __parallel
|
|||
template<typename _IIter, typename _OIter>
|
||||
_OIter
|
||||
adjacent_difference(_IIter, _IIter, _OIter,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename _BinaryOper>
|
||||
_OIter
|
||||
adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter, typename _OIter, typename _BinaryOper,
|
||||
typename _Tag1, typename _Tag2>
|
||||
|
|
@ -124,7 +124,7 @@ namespace __parallel
|
|||
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp>
|
||||
_Tp
|
||||
|
|
@ -138,7 +138,7 @@ namespace __parallel
|
|||
template<typename _IIter1, typename _IIter2, typename _Tp>
|
||||
_Tp
|
||||
inner_product(_IIter1, _IIter1, _IIter2, _Tp,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp,
|
||||
typename _BinaryFunction1, typename _BinaryFunction2>
|
||||
|
|
@ -156,7 +156,7 @@ namespace __parallel
|
|||
typename BinaryFunction1, typename BinaryFunction2>
|
||||
_Tp
|
||||
inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1,
|
||||
BinaryFunction2, __gnu_parallel::parallelism);
|
||||
BinaryFunction2, __gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _RAIter1, typename _RAIter2, typename _Tp,
|
||||
typename BinaryFunction1, typename BinaryFunction2>
|
||||
|
|
@ -164,7 +164,7 @@ namespace __parallel
|
|||
inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
|
||||
BinaryFunction2, random_access_iterator_tag,
|
||||
random_access_iterator_tag,
|
||||
__gnu_parallel::parallelism);
|
||||
__gnu_parallel::_Parallelism);
|
||||
|
||||
template<typename _IIter1, typename _IIter2, typename _Tp,
|
||||
typename _BinaryFunction1, typename _BinaryFunction2,
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ template<typename RandomAccessIterator,
|
|||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
|
||||
# pragma omp for schedule(dynamic, Settings::workstealing_chunk_size)
|
||||
# pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
|
||||
for (difference_type pos = 0; pos < length; ++pos)
|
||||
thread_results[iam] =
|
||||
r(thread_results[iam], f(o, begin+pos));
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ template<typename RandomAccessIterator,
|
|||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
|
||||
# pragma omp for schedule(static, Settings::workstealing_chunk_size)
|
||||
# pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
|
||||
for (difference_type pos = 0; pos < length; ++pos)
|
||||
thread_results[iam] = r(thread_results[iam], f(o, begin+pos));
|
||||
} //parallel
|
||||
|
|
|
|||
|
|
@ -118,6 +118,8 @@ template<typename InputIterator,
|
|||
difference_type* borders;
|
||||
value_type* sums;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
# pragma omp parallel num_threads(num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
|
|
@ -126,14 +128,13 @@ template<typename InputIterator,
|
|||
|
||||
borders = new difference_type[num_threads + 2];
|
||||
|
||||
if (Settings::partial_sum_dilatation == 1.0f)
|
||||
if (__s.partial_sum_dilation == 1.0f)
|
||||
equally_split(n, num_threads + 1, borders);
|
||||
else
|
||||
{
|
||||
difference_type chunk_length =
|
||||
((double)n
|
||||
/ ((double)num_threads
|
||||
+ Settings::partial_sum_dilatation)),
|
||||
/ ((double)num_threads + __s.partial_sum_dilation)),
|
||||
borderstart = n - num_threads * chunk_length;
|
||||
borders[0] = 0;
|
||||
for (int i = 1; i < (num_threads + 1); ++i)
|
||||
|
|
@ -209,9 +210,9 @@ template<typename InputIterator,
|
|||
|
||||
difference_type n = end - begin;
|
||||
|
||||
switch (Settings::partial_sum_algorithm)
|
||||
switch (_Settings::get().partial_sum_algorithm)
|
||||
{
|
||||
case Settings::LINEAR:
|
||||
case LINEAR:
|
||||
// Need an initial offset.
|
||||
return parallel_partial_sum_linear(begin, end, result, bin_op, n);
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -69,6 +69,8 @@ template<typename RandomAccessIterator, typename Predicate>
|
|||
|
||||
_GLIBCXX_CALL(n)
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
// Shared.
|
||||
_GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
|
||||
_GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
|
||||
|
|
@ -91,14 +93,12 @@ template<typename RandomAccessIterator, typename Predicate>
|
|||
reserved_left = new bool[num_threads];
|
||||
reserved_right = new bool[num_threads];
|
||||
|
||||
if (Settings::partition_chunk_share > 0.0)
|
||||
chunk_size = std::max<difference_type>(Settings::
|
||||
partition_chunk_size,
|
||||
(double)n * Settings::
|
||||
partition_chunk_share
|
||||
if (__s.partition_chunk_share > 0.0)
|
||||
chunk_size = std::max<difference_type>(__s.partition_chunk_size,
|
||||
(double)n * __s.partition_chunk_share
|
||||
/ (double)num_threads);
|
||||
else
|
||||
chunk_size = Settings::partition_chunk_size;
|
||||
chunk_size = __s.partition_chunk_size;
|
||||
}
|
||||
|
||||
while (right - left + 1 >= 2 * num_threads * chunk_size)
|
||||
|
|
@ -346,7 +346,7 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
random_number rng;
|
||||
|
||||
difference_type minimum_length =
|
||||
std::max<difference_type>(2, Settings::partition_minimal_n);
|
||||
std::max<difference_type>(2, _Settings::get().partition_minimal_n);
|
||||
|
||||
// Break if input range to small.
|
||||
while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
|
||||
|
|
@ -409,7 +409,7 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
break;
|
||||
}
|
||||
|
||||
// Only at most Settings::partition_minimal_n elements left.
|
||||
// Only at most _Settings::partition_minimal_n elements left.
|
||||
__gnu_sequential::sort(begin, end, comp);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ namespace __gnu_parallel
|
|||
|
||||
difference_type split =
|
||||
parallel_sort_qs_divide(begin, end, comp, pivot_rank,
|
||||
Settings::sort_qs_num_samples_preset,
|
||||
_Settings::get().sort_qs_num_samples_preset,
|
||||
num_threads);
|
||||
|
||||
#pragma omp parallel sections
|
||||
|
|
@ -179,8 +179,6 @@ namespace __gnu_parallel
|
|||
if (num_threads > n)
|
||||
num_threads = static_cast<thread_index_t>(n);
|
||||
|
||||
Settings::sort_qs_num_samples_preset = 100;
|
||||
|
||||
// Hard to avoid.
|
||||
omp_set_num_threads(num_threads);
|
||||
|
||||
|
|
|
|||
|
|
@ -274,6 +274,8 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
|
||||
_GLIBCXX_CALL(n)
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
if (num_threads > n)
|
||||
num_threads = static_cast<thread_index_t>(n);
|
||||
|
||||
|
|
@ -284,7 +286,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
|
||||
// Must fit into L1.
|
||||
num_bins_cache = std::max<difference_type>(
|
||||
1, n / (Settings::L1_cache_size_lb / sizeof(value_type)));
|
||||
1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
|
||||
num_bins_cache = round_up_to_pow2(num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
|
|
@ -293,7 +295,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin.
|
||||
num_bins = std::min<difference_type>(Settings::TLB_size / 2, num_bins);
|
||||
num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
|
||||
#endif
|
||||
num_bins = round_up_to_pow2(num_bins);
|
||||
|
||||
|
|
@ -303,7 +305,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
// Now try the L2 cache
|
||||
// Must fit into L2
|
||||
num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
|
||||
1, n / (Settings::L2_cache_size / sizeof(value_type))));
|
||||
1, n / (__s.L2_cache_size / sizeof(value_type))));
|
||||
num_bins_cache = round_up_to_pow2(num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2.
|
||||
|
|
@ -313,7 +315,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin.
|
||||
num_bins = std::min(
|
||||
static_cast<difference_type>(Settings::TLB_size / 2), num_bins);
|
||||
static_cast<difference_type>(__s.TLB_size / 2), num_bins);
|
||||
#endif
|
||||
num_bins = round_up_to_pow2(num_bins);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
|
|
@ -403,6 +405,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
typedef typename traits_type::difference_type difference_type;
|
||||
|
||||
difference_type n = end - begin;
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
bin_index num_bins, num_bins_cache;
|
||||
|
||||
|
|
@ -410,7 +413,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
// Try the L1 cache first, must fit into L1.
|
||||
num_bins_cache =
|
||||
std::max<difference_type>
|
||||
(1, n / (Settings::L1_cache_size_lb / sizeof(value_type)));
|
||||
(1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
|
||||
num_bins_cache = round_up_to_pow2(num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
|
|
@ -418,7 +421,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
num_bins = std::min(n, (difference_type)num_bins_cache);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin
|
||||
num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins);
|
||||
num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
|
||||
#endif
|
||||
num_bins = round_up_to_pow2(num_bins);
|
||||
|
||||
|
|
@ -428,7 +431,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
// Now try the L2 cache, must fit into L2.
|
||||
num_bins_cache =
|
||||
static_cast<bin_index>(std::max<difference_type>(
|
||||
1, n / (Settings::L2_cache_size / sizeof(value_type))));
|
||||
1, n / (__s.L2_cache_size / sizeof(value_type))));
|
||||
num_bins_cache = round_up_to_pow2(num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
|
|
@ -439,7 +442,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin
|
||||
num_bins =
|
||||
std::min<difference_type>(Settings::TLB_size / 2, num_bins);
|
||||
std::min<difference_type>(__s.TLB_size / 2, num_bins);
|
||||
#endif
|
||||
num_bins = round_up_to_pow2(num_bins);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
|
|
|
|||
|
|
@ -29,65 +29,59 @@
|
|||
// Public License.
|
||||
|
||||
/** @file parallel/settings.h
|
||||
* @brief Settings and tuning parameters, heuristics to decide
|
||||
* @brief Runtime settings and tuning parameters, heuristics to decide
|
||||
* whether to use parallelized algorithms.
|
||||
* This file is a GNU parallel extension to the Standard C++ Library.
|
||||
*
|
||||
* @section parallelization_decision The decision whether to run
|
||||
* an algorithm in parallel.
|
||||
* @section parallelization_decision
|
||||
* The decision whether to run an algorithm in parallel.
|
||||
*
|
||||
* There are several ways the user can switch on and off the
|
||||
* parallel execution of an algorithm, both at compile- and
|
||||
* run-time.
|
||||
* There are several ways the user can switch on and off the parallel
|
||||
* execution of an algorithm, both at compile- and run-time.
|
||||
*
|
||||
* Only sequential execution can be forced at compile-time.
|
||||
* This reduces code size and protects code parts that have
|
||||
* Only sequential execution can be forced at compile-time. This
|
||||
* reduces code size and protects code parts that have
|
||||
* non-thread-safe side effects.
|
||||
*
|
||||
* Ultimately forcing parallel execution at compile-time does
|
||||
* make much sense.
|
||||
* Often, the sequential algorithm implementation is used as
|
||||
* a subroutine, so no reduction in code size can be achieved.
|
||||
* Also, the machine the program is run on might have only one
|
||||
* processor core, so to avoid overhead, the algorithm is
|
||||
* executed sequentially.
|
||||
* Ultimately, forcing parallel execution at compile-time makes
|
||||
* sense. Often, the sequential algorithm implementation is used as
|
||||
* a subroutine, so no reduction in code size can be achieved. Also,
|
||||
* the machine the program is run on might have only one processor
|
||||
* core, so to avoid overhead, the algorithm is executed
|
||||
* sequentially.
|
||||
*
|
||||
* To force sequential execution of an algorithm ultimately
|
||||
* at compile-time, the user must add the tag
|
||||
* __gnu_parallel::sequential_tag() to the end of the
|
||||
* parameter list, e. g.
|
||||
* To force sequential execution of an algorithm ultimately at
|
||||
* compile-time, the user must add the tag
|
||||
* __gnu_parallel::sequential_tag() to the end of the parameter list,
|
||||
* e. g.
|
||||
*
|
||||
* \code
|
||||
* std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
|
||||
* \endcode
|
||||
*
|
||||
* This is compatible with all overloaded algorithm variants.
|
||||
* No additional code will be instantiated, at all.
|
||||
* The same holds for most algorithm calls with iterators
|
||||
* not providing random access.
|
||||
* This is compatible with all overloaded algorithm variants. No
|
||||
* additional code will be instantiated, at all. The same holds for
|
||||
* most algorithm calls with iterators not providing random access.
|
||||
*
|
||||
* If the algorithm call is not forced to be executed sequentially
|
||||
* at compile-time, the decision is made at run-time, for each call.
|
||||
* First, the two (conceptually) global variables
|
||||
* __gnu_parallel::Settings::force_sequential and
|
||||
* __gnu_parallel::Settings::force_parallel are executed.
|
||||
* If the former one is true, the sequential algorithm is executed.
|
||||
* If the latter one is true and the former one is false,
|
||||
* the algorithm is executed in parallel.
|
||||
* at compile-time, the decision is made at run-time.
|
||||
* The global variable __gnu_parallel::_Settings::algorithm_strategy
|
||||
* is checked. It is a tristate variable corresponding to:
|
||||
*
|
||||
* If none of these conditions has fired so far, a heuristic is used.
|
||||
* The parallel algorithm implementation is called only if the
|
||||
* input size is sufficiently large.
|
||||
* For most algorithms, the input size is the (combined) length of
|
||||
* the input sequence(s).
|
||||
* The threshold can be set by the user, individually for each
|
||||
* algorithm.
|
||||
* The according variables are called
|
||||
* __gnu_parallel::Settings::[algorithm]_minimal_n .
|
||||
* a. force_sequential, meaning the sequential algorithm is executed.
|
||||
* b. force_parallel, meaning the parallel algorithm is executed.
|
||||
* c. heuristic
|
||||
*
|
||||
* For heuristic, the parallel algorithm implementation is called
|
||||
* only if the input size is sufficiently large. For most
|
||||
* algorithms, the input size is the (combined) length of the input
|
||||
* sequence(s). The threshold can be set by the user, individually
|
||||
* for each algorithm. The according variables are called
|
||||
* __gnu_parallel::_Settings::[algorithm]_minimal_n .
|
||||
*
|
||||
* For some of the algorithms, there are even more tuning options,
|
||||
* e. g. the ability to choose from multiple algorithm variants.
|
||||
* See the __gnu_parallel::Settings class for details.
|
||||
* e. g. the ability to choose from multiple algorithm variants. See
|
||||
* below for details.
|
||||
*/
|
||||
|
||||
// Written by Johannes Singler and Felix Putze.
|
||||
|
|
@ -95,306 +89,199 @@
|
|||
#ifndef _GLIBCXX_PARALLEL_SETTINGS_H
|
||||
#define _GLIBCXX_PARALLEL_SETTINGS_H 1
|
||||
|
||||
#include <omp.h>
|
||||
#include <parallel/types.h>
|
||||
|
||||
/**
|
||||
* @brief The extensible condition on whether the parallel variant of
|
||||
* an algorithm should be called.
|
||||
* @param c A condition that is overruled by
|
||||
* __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on
|
||||
* the input size.
|
||||
* @brief Determine at compile(?)-time if the parallel variant of an
|
||||
* algorithm should be called.
|
||||
* @param c A condition that is convertible to bool that is overruled by
|
||||
* __gnu_parallel::_Settings::algorithm_strategy. Usually a decision
|
||||
* based on the input size.
|
||||
*/
|
||||
#define _GLIBCXX_PARALLEL_CONDITION(c) \
|
||||
(!(__gnu_parallel::Settings::force_sequential) \
|
||||
&& ((__gnu_parallel::get_max_threads() > 1 \
|
||||
&& (c)) || __gnu_parallel::Settings::force_parallel))
|
||||
#define _GLIBCXX_PARALLEL_CONDITION(c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel))
|
||||
|
||||
/*
|
||||
inline bool
|
||||
parallel_condition(bool c)
|
||||
{
|
||||
bool ret = false;
|
||||
const _Settings& s = _Settings::get();
|
||||
if (s.algorithm_strategy != force_seqential)
|
||||
{
|
||||
if (s.algorithm_strategy == force_parallel)
|
||||
ret = true;
|
||||
else
|
||||
ret = get_max_threads() > 1 && c;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
*/
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
// NB: Including this file cannot produce (unresolved) symbols from
|
||||
// the OpenMP runtime unless the parallel mode is actually invoked
|
||||
// and active, which implies that the OpenMP runtime is actually
|
||||
// going to be linked in.
|
||||
inline int
|
||||
get_max_threads()
|
||||
{ return omp_get_max_threads() > 1 ? omp_get_max_threads() : 1; }
|
||||
|
||||
namespace
|
||||
{
|
||||
// XXX look at _Tune in mt_allocator.h
|
||||
/** @brief Run-time settings for the parallel mode. */
|
||||
struct Settings
|
||||
/// class _Settings
|
||||
/// Run-time settings for the parallel mode, including all tunable parameters.
|
||||
struct _Settings
|
||||
{
|
||||
/** @brief Different parallel sorting algorithms to choose
|
||||
from: multi-way mergesort, quicksort, load-balanced
|
||||
quicksort. */
|
||||
enum SortAlgorithm
|
||||
{ MWMS, QS, QS_BALANCED };
|
||||
_AlgorithmStrategy algorithm_strategy;
|
||||
|
||||
_SortAlgorithm sort_algorithm;
|
||||
_PartialSumAlgorithm partial_sum_algorithm;
|
||||
_MultiwayMergeAlgorithm multiway_merge_algorithm;
|
||||
_FindAlgorithm find_algorithm;
|
||||
|
||||
/** @brief Different merging algorithms: bubblesort-alike,
|
||||
loser-tree variants, enum sentinel */
|
||||
enum MultiwayMergeAlgorithm
|
||||
{ BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED,
|
||||
LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST };
|
||||
_SplittingAlgorithm sort_splitting;
|
||||
_SplittingAlgorithm merge_splitting;
|
||||
_SplittingAlgorithm multiway_merge_splitting;
|
||||
|
||||
/** @brief Different splitting strategies for sorting/merging:
|
||||
by sampling, exact */
|
||||
enum Splitting
|
||||
{ SAMPLING, EXACT };
|
||||
// Per-algorithm settings.
|
||||
|
||||
/** @brief Different partial sum algorithms: recursive, linear */
|
||||
enum PartialSumAlgorithm
|
||||
{ RECURSIVE, LINEAR };
|
||||
/// Minimal input size for accumulate.
|
||||
sequence_index_t accumulate_minimal_n;
|
||||
|
||||
/** @brief Different find distribution strategies: growing
|
||||
blocks, equal-sized blocks, equal splitting. */
|
||||
enum FindDistribution
|
||||
{ GROWING_BLOCKS, CONSTANT_SIZE_BLOCKS, EQUAL_SPLIT };
|
||||
/// Minimal input size for adjacent_difference.
|
||||
unsigned int adjacent_difference_minimal_n;
|
||||
|
||||
/** @brief Force all algorithms to be executed sequentially.
|
||||
* This setting cannot be overwritten. */
|
||||
static volatile bool force_sequential;
|
||||
/// Minimal input size for count and count_if.
|
||||
sequence_index_t count_minimal_n;
|
||||
|
||||
/** @brief Force all algorithms to be executed in parallel.
|
||||
* This setting can be overridden by __gnu_parallel::sequential_tag
|
||||
* (compile-time), and force_sequential (run-time). */
|
||||
static volatile bool force_parallel;
|
||||
/// Minimal input size for fill.
|
||||
sequence_index_t fill_minimal_n;
|
||||
|
||||
/** @brief Algorithm to use for sorting. */
|
||||
static volatile SortAlgorithm sort_algorithm;
|
||||
/// Block size increase factor for find.
|
||||
double find_increasing_factor;
|
||||
|
||||
/** @brief Strategy to use for splitting the input when
|
||||
sorting (MWMS). */
|
||||
static volatile Splitting sort_splitting;
|
||||
/// Initial block size for find.
|
||||
sequence_index_t find_initial_block_size;
|
||||
|
||||
/** @brief Minimal input size for parallel sorting. */
|
||||
static volatile sequence_index_t sort_minimal_n;
|
||||
/// Maximal block size for find.
|
||||
sequence_index_t find_maximum_block_size;
|
||||
|
||||
/** @brief Oversampling factor for parallel std::sort (MWMS). */
|
||||
static volatile unsigned int sort_mwms_oversampling;
|
||||
/// Start with looking for this many elements sequentially, for find.
|
||||
sequence_index_t find_sequential_search_size;
|
||||
|
||||
/** @brief Such many samples to take to find a good pivot
|
||||
(quicksort). */
|
||||
static volatile unsigned int sort_qs_num_samples_preset;
|
||||
/// Minimal input size for for_each.
|
||||
sequence_index_t for_each_minimal_n;
|
||||
|
||||
/** @brief Maximal subsequence length to switch to unbalanced
|
||||
* base case. Applies to std::sort with dynamically
|
||||
* load-balanced quicksort. */
|
||||
static volatile sequence_index_t sort_qsb_base_case_maximal_n;
|
||||
/// Minimal input size for generate.
|
||||
sequence_index_t generate_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::partition. */
|
||||
static volatile sequence_index_t partition_minimal_n;
|
||||
/// Minimal input size for max_element.
|
||||
sequence_index_t max_element_minimal_n;
|
||||
|
||||
/** @brief Chunk size for parallel std::partition. */
|
||||
static volatile sequence_index_t partition_chunk_size;
|
||||
/// Minimal input size for merge.
|
||||
sequence_index_t merge_minimal_n;
|
||||
|
||||
/** @brief Chunk size for parallel std::partition, relative to
|
||||
* input size. If >0.0, this value overrides
|
||||
* partition_chunk_size. */
|
||||
static volatile double partition_chunk_share;
|
||||
/// Oversampling factor for merge.
|
||||
unsigned int merge_oversampling;
|
||||
|
||||
/** @brief Minimal input size for parallel std::nth_element. */
|
||||
static volatile sequence_index_t nth_element_minimal_n;
|
||||
/// Minimal input size for min_element.
|
||||
sequence_index_t min_element_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::partial_sort. */
|
||||
static volatile sequence_index_t partial_sort_minimal_n;
|
||||
/// Minimal input size for multiway_merge.
|
||||
sequence_index_t multiway_merge_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::adjacent_difference. */
|
||||
static volatile unsigned int adjacent_difference_minimal_n;
|
||||
/// Oversampling factor for multiway_merge.
|
||||
int multiway_merge_minimal_k;
|
||||
|
||||
/** @brief Minimal input size for parallel std::partial_sum. */
|
||||
static volatile unsigned int partial_sum_minimal_n;
|
||||
/// Oversampling factor for multiway_merge.
|
||||
unsigned int multiway_merge_oversampling;
|
||||
|
||||
/** @brief Algorithm to use for std::partial_sum. */
|
||||
static volatile PartialSumAlgorithm partial_sum_algorithm;
|
||||
/// Minimal input size for nth_element.
|
||||
sequence_index_t nth_element_minimal_n;
|
||||
|
||||
/** @brief Assume "sum and write result" to be that factor
|
||||
* slower than just "sum". This value is used for
|
||||
* std::partial_sum. */
|
||||
static volatile float partial_sum_dilatation;
|
||||
/// Chunk size for partition.
|
||||
sequence_index_t partition_chunk_size;
|
||||
|
||||
/** @brief Minimal input size for parallel std::random_shuffle. */
|
||||
static volatile unsigned int random_shuffle_minimal_n;
|
||||
/// Chunk size for partition, relative to input size. If > 0.0,
|
||||
/// this value overrides partition_chunk_size.
|
||||
double partition_chunk_share;
|
||||
|
||||
/** @brief Minimal input size for parallel std::merge. */
|
||||
static volatile sequence_index_t merge_minimal_n;
|
||||
/// Minimal input size for partition.
|
||||
sequence_index_t partition_minimal_n;
|
||||
|
||||
/** @brief Splitting strategy for parallel std::merge. */
|
||||
static volatile Splitting merge_splitting;
|
||||
/// Minimal input size for partial_sort.
|
||||
sequence_index_t partial_sort_minimal_n;
|
||||
|
||||
/** @brief Oversampling factor for parallel std::merge.
|
||||
* Such many samples per thread are collected. */
|
||||
static volatile unsigned int merge_oversampling;
|
||||
/// Ratio for partial_sum. Assume "sum and write result" to be
|
||||
/// this factor slower than just "sum".
|
||||
float partial_sum_dilation;
|
||||
|
||||
/** @brief Algorithm to use for parallel
|
||||
__gnu_parallel::multiway_merge. */
|
||||
static volatile MultiwayMergeAlgorithm multiway_merge_algorithm;
|
||||
/// Minimal input size for partial_sum.
|
||||
unsigned int partial_sum_minimal_n;
|
||||
|
||||
/** @brief Splitting strategy to use for parallel
|
||||
__gnu_parallel::multiway_merge. */
|
||||
static volatile Splitting multiway_merge_splitting;
|
||||
/// Minimal input size for random_shuffle.
|
||||
unsigned int random_shuffle_minimal_n;
|
||||
|
||||
//// Oversampling factor for parallel __gnu_parallel::multiway_merge.
|
||||
static volatile unsigned int multiway_merge_oversampling;
|
||||
/// Minimal input size for replace and replace_if.
|
||||
sequence_index_t replace_minimal_n;
|
||||
|
||||
/// Minimal input size for parallel __gnu_parallel::multiway_merge.
|
||||
static volatile sequence_index_t multiway_merge_minimal_n;
|
||||
/// Minimal input size for set_difference.
|
||||
sequence_index_t set_difference_minimal_n;
|
||||
|
||||
/// Oversampling factor for parallel __gnu_parallel::multiway_merge.
|
||||
static volatile int multiway_merge_minimal_k;
|
||||
/// Minimal input size for set_intersection.
|
||||
sequence_index_t set_intersection_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::unique_copy. */
|
||||
static volatile sequence_index_t unique_copy_minimal_n;
|
||||
/// Minimal input size for set_symmetric_difference.
|
||||
sequence_index_t set_symmetric_difference_minimal_n;
|
||||
|
||||
static volatile sequence_index_t workstealing_chunk_size;
|
||||
/// Minimal input size for set_union.
|
||||
sequence_index_t set_union_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::for_each. */
|
||||
static volatile sequence_index_t for_each_minimal_n;
|
||||
/// Minimal input size for parallel sorting.
|
||||
sequence_index_t sort_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::count and
|
||||
std::count_if. */
|
||||
static volatile sequence_index_t count_minimal_n;
|
||||
/// Oversampling factor for parallel std::sort (MWMS).
|
||||
unsigned int sort_mwms_oversampling;
|
||||
|
||||
/** @brief Minimal input size for parallel std::transform. */
|
||||
static volatile sequence_index_t transform_minimal_n;
|
||||
/// Such many samples to take to find a good pivot (quicksort).
|
||||
unsigned int sort_qs_num_samples_preset;
|
||||
|
||||
/** @brief Minimal input size for parallel std::replace and
|
||||
std::replace_if. */
|
||||
static volatile sequence_index_t replace_minimal_n;
|
||||
/// Maximal subsequence length to switch to unbalanced base case.
|
||||
/// Applies to std::sort with dynamically load-balanced quicksort.
|
||||
sequence_index_t sort_qsb_base_case_maximal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::generate. */
|
||||
static volatile sequence_index_t generate_minimal_n;
|
||||
/// Minimal input size for parallel std::transform.
|
||||
sequence_index_t transform_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::fill. */
|
||||
static volatile sequence_index_t fill_minimal_n;
|
||||
/// Minimal input size for unique_copy.
|
||||
sequence_index_t unique_copy_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::min_element. */
|
||||
static volatile sequence_index_t min_element_minimal_n;
|
||||
sequence_index_t workstealing_chunk_size;
|
||||
|
||||
/** @brief Minimal input size for parallel std::max_element. */
|
||||
static volatile sequence_index_t max_element_minimal_n;
|
||||
// Hardware dependent tuning parameters.
|
||||
|
||||
/** @brief Minimal input size for parallel std::accumulate. */
|
||||
static volatile sequence_index_t accumulate_minimal_n;
|
||||
/// Size of the L1 cache in bytes (underestimation).
|
||||
unsigned long long L1_cache_size;
|
||||
|
||||
/** @brief Distribution strategy for parallel std::find. */
|
||||
static volatile FindDistribution find_distribution;
|
||||
/// Size of the L2 cache in bytes (underestimation).
|
||||
unsigned long long L2_cache_size;
|
||||
|
||||
/** @brief Start with looking for that many elements
|
||||
sequentially, for std::find. */
|
||||
static volatile sequence_index_t find_sequential_search_size;
|
||||
/// Size of the Translation Lookaside Buffer (underestimation).
|
||||
unsigned int TLB_size;
|
||||
|
||||
/** @brief Initial block size for parallel std::find. */
|
||||
static volatile sequence_index_t find_initial_block_size;
|
||||
/// Overestimation of cache line size. Used to avoid false
|
||||
/// sharing, i. e. elements of different threads are at least this
|
||||
/// amount apart.
|
||||
unsigned int cache_line_size;
|
||||
|
||||
/** @brief Maximal block size for parallel std::find. */
|
||||
static volatile sequence_index_t find_maximum_block_size;
|
||||
// Statistics.
|
||||
|
||||
/** @brief Block size increase factor for parallel std::find. */
|
||||
static volatile double find_increasing_factor;
|
||||
/// The number of stolen ranges in load-balanced quicksort.
|
||||
sequence_index_t qsb_steals;
|
||||
|
||||
//set operations
|
||||
/** @brief Minimal input size for parallel std::set_union. */
|
||||
static volatile sequence_index_t set_union_minimal_n;
|
||||
/// Get the global settings.
|
||||
static const _Settings&
|
||||
get() throw();
|
||||
|
||||
/** @brief Minimal input size for parallel
|
||||
std::set_symmetric_difference. */
|
||||
static volatile sequence_index_t set_symmetric_difference_minimal_n;
|
||||
/// Set the global settings.
|
||||
static void
|
||||
set(_Settings&) throw();
|
||||
|
||||
/** @brief Minimal input size for parallel std::set_difference. */
|
||||
static volatile sequence_index_t set_difference_minimal_n;
|
||||
|
||||
/** @brief Minimal input size for parallel std::set_intersection. */
|
||||
static volatile sequence_index_t set_intersection_minimal_n;
|
||||
|
||||
//hardware dependent tuning parameters
|
||||
/** @brief Size of the L1 cache in bytes (underestimation). */
|
||||
static volatile unsigned long long L1_cache_size;
|
||||
|
||||
/** @brief Size of the L2 cache in bytes (underestimation). */
|
||||
static volatile unsigned long long L2_cache_size;
|
||||
|
||||
/** @brief Size of the Translation Lookaside Buffer
|
||||
(underestimation). */
|
||||
static volatile unsigned int TLB_size;
|
||||
|
||||
/** @brief Overestimation of cache line size. Used to avoid
|
||||
* false sharing, i. e. elements of different threads are at
|
||||
* least this amount apart. */
|
||||
static unsigned int cache_line_size;
|
||||
|
||||
//statistics
|
||||
/** @brief Statistic on the number of stolen ranges in
|
||||
load-balanced quicksort.*/
|
||||
static volatile sequence_index_t qsb_steals;
|
||||
explicit
|
||||
_Settings() : algorithm_strategy(heuristic), sort_algorithm(MWMS), partial_sum_algorithm(LINEAR), multiway_merge_algorithm(LOSER_TREE), find_algorithm(CONSTANT_SIZE_BLOCKS), sort_splitting(EXACT), merge_splitting(EXACT), multiway_merge_splitting(EXACT), accumulate_minimal_n(1000), adjacent_difference_minimal_n(1000), count_minimal_n(1000), fill_minimal_n(1000), find_increasing_factor(2.0), find_initial_block_size(256), find_maximum_block_size(8192), find_sequential_search_size(256), for_each_minimal_n(1000), generate_minimal_n(1000), max_element_minimal_n(1000), merge_minimal_n(1000), merge_oversampling(10), min_element_minimal_n(1000), multiway_merge_minimal_n(1000), multiway_merge_minimal_k(2), multiway_merge_oversampling(10), nth_element_minimal_n(1000), partition_chunk_size(1000), partition_chunk_share(0.0), partition_minimal_n(1000), partial_sort_minimal_n(1000), partial_sum_dilation(1.0f), partial_sum_minimal_n(1000), random_shuffle_minimal_n(1000), replace_minimal_n(1000), set_difference_minimal_n(1000), set_intersection_minimal_n(1000), set_symmetric_difference_minimal_n(1000), set_union_minimal_n(1000), sort_minimal_n(1000), sort_mwms_oversampling(10), sort_qs_num_samples_preset(100), sort_qsb_base_case_maximal_n(100), transform_minimal_n(1000), unique_copy_minimal_n(10000), workstealing_chunk_size(100), L1_cache_size(16 << 10), L2_cache_size(256 << 10), TLB_size(128), cache_line_size(64), qsb_steals(0)
|
||||
{ }
|
||||
};
|
||||
|
||||
volatile bool Settings::force_parallel = false;
|
||||
volatile bool Settings::force_sequential = false;
|
||||
volatile Settings::SortAlgorithm Settings::sort_algorithm = Settings::MWMS;
|
||||
volatile Settings::Splitting Settings::sort_splitting = Settings::EXACT;
|
||||
volatile sequence_index_t Settings::sort_minimal_n = 1000;
|
||||
|
||||
volatile unsigned int Settings::sort_mwms_oversampling = 10;
|
||||
volatile unsigned int Settings::sort_qs_num_samples_preset = 100;
|
||||
volatile sequence_index_t Settings::sort_qsb_base_case_maximal_n = 100;
|
||||
volatile sequence_index_t Settings::partition_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::nth_element_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::partial_sort_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::partition_chunk_size = 1000;
|
||||
volatile double Settings::partition_chunk_share = 0.0;
|
||||
volatile unsigned int Settings::adjacent_difference_minimal_n = 1000;
|
||||
volatile Settings::PartialSumAlgorithm Settings::
|
||||
partial_sum_algorithm = Settings::LINEAR;
|
||||
volatile unsigned int Settings::partial_sum_minimal_n = 1000;
|
||||
volatile float Settings::partial_sum_dilatation = 1.0f;
|
||||
volatile unsigned int Settings::random_shuffle_minimal_n = 1000;
|
||||
volatile Settings::Splitting Settings::merge_splitting = Settings::EXACT;
|
||||
volatile sequence_index_t Settings::merge_minimal_n = 1000;
|
||||
volatile unsigned int Settings::merge_oversampling = 10;
|
||||
volatile sequence_index_t Settings::multiway_merge_minimal_n = 1000;
|
||||
volatile int Settings::multiway_merge_minimal_k = 2;
|
||||
|
||||
// unique copy
|
||||
volatile sequence_index_t Settings::unique_copy_minimal_n = 10000;
|
||||
volatile Settings::MultiwayMergeAlgorithm Settings::
|
||||
multiway_merge_algorithm = Settings::LOSER_TREE;
|
||||
volatile Settings::Splitting Settings::multiway_merge_splitting =
|
||||
Settings::EXACT;
|
||||
volatile unsigned int Settings::multiway_merge_oversampling = 10;
|
||||
volatile Settings::FindDistribution Settings::find_distribution =
|
||||
Settings::CONSTANT_SIZE_BLOCKS;
|
||||
volatile sequence_index_t Settings::find_sequential_search_size = 256;
|
||||
volatile sequence_index_t Settings::find_initial_block_size = 256;
|
||||
volatile sequence_index_t Settings::find_maximum_block_size = 8192;
|
||||
volatile double Settings::find_increasing_factor = 2.0;
|
||||
volatile sequence_index_t Settings::workstealing_chunk_size = 100;
|
||||
volatile sequence_index_t Settings::for_each_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::count_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::transform_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::replace_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::generate_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::fill_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::min_element_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::max_element_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::accumulate_minimal_n = 1000;
|
||||
|
||||
//set operations
|
||||
volatile sequence_index_t Settings::set_union_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::set_intersection_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::set_difference_minimal_n = 1000;
|
||||
volatile sequence_index_t Settings::set_symmetric_difference_minimal_n =
|
||||
1000;
|
||||
volatile unsigned long long Settings::L1_cache_size = 16 << 10;
|
||||
volatile unsigned long long Settings::L2_cache_size = 256 << 10;
|
||||
volatile unsigned int Settings::TLB_size = 128;
|
||||
unsigned int Settings::cache_line_size = 64;
|
||||
|
||||
//statistics
|
||||
volatile sequence_index_t Settings::qsb_steals = 0;
|
||||
} // end anonymous namespace
|
||||
|
||||
}
|
||||
|
||||
#endif /* _GLIBCXX_SETTINGS_H */
|
||||
|
|
|
|||
|
|
@ -84,16 +84,15 @@ namespace __gnu_parallel
|
|||
|
||||
if (false) ;
|
||||
#if _GLIBCXX_MERGESORT
|
||||
else if (Settings::sort_algorithm == Settings::MWMS || stable)
|
||||
else if (stable || _Settings::get().sort_algorithm == MWMS)
|
||||
parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable);
|
||||
#endif
|
||||
#if _GLIBCXX_QUICKSORT
|
||||
else if (Settings::sort_algorithm == Settings::QS && !stable)
|
||||
else if (!stable && _Settings::get().sort_algorithm == QS)
|
||||
parallel_sort_qs(begin, end, comp, n, get_max_threads());
|
||||
#endif
|
||||
#if _GLIBCXX_BAL_QUICKSORT
|
||||
else if (Settings::sort_algorithm == Settings::QS_BALANCED
|
||||
&& !stable)
|
||||
else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
|
||||
parallel_sort_qsb(begin, end, comp, n, get_max_threads());
|
||||
#endif
|
||||
else
|
||||
|
|
|
|||
|
|
@ -64,18 +64,19 @@ namespace __gnu_parallel
|
|||
struct omp_loop_static_tag : public parallel_tag { };
|
||||
|
||||
|
||||
// XXX settings.h Settings::FindDistribution
|
||||
struct find_tag { };
|
||||
|
||||
/** @brief Selects the growing block size variant for std::find().
|
||||
@see _GLIBCXX_FIND_GROWING_BLOCKS */
|
||||
struct growing_blocks_tag { };
|
||||
struct growing_blocks_tag : public find_tag { };
|
||||
|
||||
/** @brief Selects the constant block size variant for std::find().
|
||||
@see _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS */
|
||||
struct constant_size_blocks_tag { };
|
||||
struct constant_size_blocks_tag : public find_tag { };
|
||||
|
||||
/** @brief Selects the equal splitting variant for std::find().
|
||||
@see _GLIBCXX_FIND_EQUAL_SPLIT */
|
||||
struct equal_split_tag { };
|
||||
struct equal_split_tag : public find_tag { };
|
||||
}
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_TAGS_H */
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@ namespace __gnu_parallel
|
|||
{
|
||||
// Enumerated types.
|
||||
|
||||
/// @brief Run-time equivalents for the compile-time tags.
|
||||
enum parallelism
|
||||
/// Run-time equivalents for the compile-time tags.
|
||||
enum _Parallelism
|
||||
{
|
||||
/// Not parallel.
|
||||
sequential,
|
||||
|
|
@ -66,9 +66,60 @@ namespace __gnu_parallel
|
|||
parallel_taskqueue
|
||||
};
|
||||
|
||||
inline bool
|
||||
is_parallel(const parallelism __p) { return __p != sequential; }
|
||||
/// Strategies for run-time algorithm selection:
|
||||
// force_sequential, force_parallel, heuristic.
|
||||
enum _AlgorithmStrategy
|
||||
{
|
||||
heuristic,
|
||||
force_sequential,
|
||||
force_parallel
|
||||
};
|
||||
|
||||
/// Sorting algorithms:
|
||||
// multi-way mergesort, quicksort, load-balanced quicksort.
|
||||
enum _SortAlgorithm
|
||||
{
|
||||
MWMS,
|
||||
QS,
|
||||
QS_BALANCED
|
||||
};
|
||||
|
||||
/// Merging algorithms:
|
||||
// bubblesort-alike, loser-tree variants, enum sentinel.
|
||||
enum _MultiwayMergeAlgorithm
|
||||
{
|
||||
BUBBLE,
|
||||
LOSER_TREE_EXPLICIT,
|
||||
LOSER_TREE,
|
||||
LOSER_TREE_COMBINED,
|
||||
LOSER_TREE_SENTINEL,
|
||||
ENUM_SENTINEL
|
||||
};
|
||||
|
||||
/// Partial sum algorithms: recursive, linear.
|
||||
enum _PartialSumAlgorithm
|
||||
{
|
||||
RECURSIVE,
|
||||
LINEAR
|
||||
};
|
||||
|
||||
/// Sorting/merging algorithms: sampling, exact.
|
||||
enum _SplittingAlgorithm
|
||||
{
|
||||
SAMPLING,
|
||||
EXACT
|
||||
};
|
||||
|
||||
/// Find algorithms:
|
||||
// growing blocks, equal-sized blocks, equal splitting.
|
||||
enum _FindAlgorithm
|
||||
{
|
||||
GROWING_BLOCKS,
|
||||
CONSTANT_SIZE_BLOCKS,
|
||||
EQUAL_SPLIT
|
||||
};
|
||||
|
||||
/// Integer Types.
|
||||
// XXX need to use <cstdint>
|
||||
/** @brief 16-bit signed integer. */
|
||||
typedef short int16;
|
||||
|
|
@ -101,20 +152,14 @@ namespace __gnu_parallel
|
|||
typedef uint16 thread_index_t;
|
||||
|
||||
// XXX atomics interface?
|
||||
/**
|
||||
* @brief Longest compare-and-swappable integer type on this platform.
|
||||
*/
|
||||
/// Longest compare-and-swappable integer type on this platform.
|
||||
typedef int64 lcas_t;
|
||||
|
||||
// XXX numeric_limits::digits?
|
||||
/**
|
||||
* @brief Number of bits of ::lcas_t.
|
||||
*/
|
||||
/// Number of bits of ::lcas_t.
|
||||
static const int lcas_t_bits = sizeof(lcas_t) * 8;
|
||||
|
||||
/**
|
||||
* @brief ::lcas_t with the right half of bits set to 1.
|
||||
*/
|
||||
/// ::lcas_t with the right half of bits set to 1.
|
||||
static const lcas_t lcas_t_mask = ((lcas_t(1) << (lcas_t_bits / 2)) - 1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -114,17 +114,16 @@ template<typename RandomAccessIterator,
|
|||
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
|
||||
difference_type chunk_size =
|
||||
static_cast<difference_type>(Settings::workstealing_chunk_size);
|
||||
difference_type chunk_size = static_cast<difference_type>(__s.workstealing_chunk_size);
|
||||
|
||||
// How many jobs?
|
||||
difference_type length = (bound < 0) ? (end - begin) : bound;
|
||||
|
||||
// To avoid false sharing in a cache line.
|
||||
const int stride =
|
||||
Settings::cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
|
||||
const int stride = __s.cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
|
||||
|
||||
// Total number of threads currently working.
|
||||
thread_index_t busy = 0;
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ basic_file.cc: ${glibcxx_srcdir}/$(BASIC_FILE_CC)
|
|||
$(LN_S) ${glibcxx_srcdir}/$(BASIC_FILE_CC) ./$@ || true
|
||||
|
||||
if ENABLE_PARALLEL
|
||||
parallel_sources = parallel_list.cc
|
||||
parallel_sources = parallel_list.cc parallel_settings.cc
|
||||
else
|
||||
parallel_sources =
|
||||
endif
|
||||
|
|
@ -221,6 +221,11 @@ parallel_list.lo: parallel_list.cc
|
|||
parallel_list.o: parallel_list.cc
|
||||
$(CXXCOMPILE) $(PARALLEL_FLAGS) -c $<
|
||||
|
||||
parallel_settings.lo: parallel_settings.cc
|
||||
$(LTCXXCOMPILE) $(PARALLEL_FLAGS) -c $<
|
||||
parallel_settings.o: parallel_settings.cc
|
||||
$(CXXCOMPILE) $(PARALLEL_FLAGS) -c $<
|
||||
|
||||
# Use special rules for the C++0x sources so that the proper flags are passed.
|
||||
system_error.lo: system_error.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++0x -c $<
|
||||
|
|
|
|||
|
|
@ -84,12 +84,13 @@ am__libstdc___la_SOURCES_DIST = bitmap_allocator.cc pool_allocator.cc \
|
|||
codecvt_members.cc collate_members.cc ctype_members.cc \
|
||||
messages_members.cc monetary_members.cc numeric_members.cc \
|
||||
time_members.cc basic_file.cc c++locale.cc \
|
||||
compatibility-ldbl.cc parallel_list.cc
|
||||
compatibility-ldbl.cc parallel_list.cc parallel_settings.cc
|
||||
am__objects_1 = atomicity.lo codecvt_members.lo collate_members.lo \
|
||||
ctype_members.lo messages_members.lo monetary_members.lo \
|
||||
numeric_members.lo time_members.lo
|
||||
@GLIBCXX_LDBL_COMPAT_TRUE@am__objects_2 = compatibility-ldbl.lo
|
||||
@ENABLE_PARALLEL_TRUE@am__objects_3 = parallel_list.lo
|
||||
@ENABLE_PARALLEL_TRUE@am__objects_3 = parallel_list.lo \
|
||||
@ENABLE_PARALLEL_TRUE@ parallel_settings.lo
|
||||
am__objects_4 = basic_file.lo c++locale.lo $(am__objects_2) \
|
||||
$(am__objects_3)
|
||||
am__objects_5 = bitmap_allocator.lo pool_allocator.lo mt_allocator.lo \
|
||||
|
|
@ -359,7 +360,7 @@ host_sources_extra = \
|
|||
basic_file.cc c++locale.cc ${ldbl_compat_sources} ${parallel_sources}
|
||||
|
||||
@ENABLE_PARALLEL_FALSE@parallel_sources =
|
||||
@ENABLE_PARALLEL_TRUE@parallel_sources = parallel_list.cc
|
||||
@ENABLE_PARALLEL_TRUE@parallel_sources = parallel_list.cc parallel_settings.cc
|
||||
@GLIBCXX_LDBL_COMPAT_FALSE@ldbl_compat_sources =
|
||||
@GLIBCXX_LDBL_COMPAT_TRUE@ldbl_compat_sources = compatibility-ldbl.cc
|
||||
|
||||
|
|
@ -810,6 +811,11 @@ parallel_list.lo: parallel_list.cc
|
|||
parallel_list.o: parallel_list.cc
|
||||
$(CXXCOMPILE) $(PARALLEL_FLAGS) -c $<
|
||||
|
||||
parallel_settings.lo: parallel_settings.cc
|
||||
$(LTCXXCOMPILE) $(PARALLEL_FLAGS) -c $<
|
||||
parallel_settings.o: parallel_settings.cc
|
||||
$(CXXCOMPILE) $(PARALLEL_FLAGS) -c $<
|
||||
|
||||
# Use special rules for the C++0x sources so that the proper flags are passed.
|
||||
system_error.lo: system_error.cc
|
||||
$(LTCXXCOMPILE) -std=gnu++0x -c $<
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
// Default settings for parallel mode -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2007 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 2, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING. If not, write to the Free
|
||||
// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||
// USA.
|
||||
|
||||
// As a special exception, you may use this file as part of a free software
|
||||
// library without restriction. Specifically, if other files instantiate
|
||||
// templates or use macros or inline functions from this file, or you compile
|
||||
// this file and link it with other files to produce an executable, this
|
||||
// file does not by itself cause the resulting executable to be covered by
|
||||
// the GNU General Public License. This exception does not however
|
||||
// invalidate any other reasons why the executable file might be covered by
|
||||
// the GNU General Public License.
|
||||
|
||||
#include <parallel/settings.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
__gnu_parallel::_Settings s;
|
||||
}
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
const _Settings&
|
||||
_Settings::get() throw()
|
||||
{ return s; }
|
||||
|
||||
// XXX MT
|
||||
void
|
||||
_Settings::set(_Settings& obj) throw()
|
||||
{ s = obj; }
|
||||
}
|
||||
Loading…
Reference in New Issue