mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			380 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			380 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			C
		
	
	
	
| /* Test and benchmark of a couple of parallel sorting algorithms.
 | |
|    Copyright (C) 2008-2016 Free Software Foundation, Inc.
 | |
| 
 | |
|    GCC is free software; you can redistribute it and/or modify it under
 | |
|    the terms of the GNU General Public License as published by the Free
 | |
|    Software Foundation; either version 3, or (at your option) any later
 | |
|    version.
 | |
| 
 | |
|    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 | |
|    WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | |
|    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 | |
|    for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with GCC; see the file COPYING3.  If not see
 | |
|    <http://www.gnu.org/licenses/>.  */
 | |
| 
 | |
| #include <limits.h>
 | |
| #include <omp.h>
 | |
| #include <stdbool.h>
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| 
 | |
| int failures;
 | |
| 
 | |
| #define THRESHOLD 100
 | |
| 
 | |
| static void
 | |
| verify (const char *name, double stime, int *array, int count)
 | |
| {
 | |
|   int i;
 | |
|   double etime = omp_get_wtime ();
 | |
| 
 | |
|   printf ("%s: %g\n", name, etime - stime);
 | |
|   for (i = 1; i < count; i++)
 | |
|     if (array[i] < array[i - 1])
 | |
|       {
 | |
| 	printf ("%s: incorrectly sorted\n", name);
 | |
| 	failures = 1;
 | |
|       }
 | |
| }
 | |
| 
 | |
| static void
 | |
| insertsort (int *array, int s, int e)
 | |
| {
 | |
|   int i, j, val;
 | |
|   for (i = s + 1; i <= e; i++)
 | |
|     {
 | |
|       val = array[i];
 | |
|       j = i;
 | |
|       while (j-- > s && val < array[j])
 | |
| 	array[j + 1] = array[j];
 | |
|       array[j + 1] = val;
 | |
|     }
 | |
| }
 | |
| 
 | |
| struct int_pair
 | |
| {
 | |
|   int lo;
 | |
|   int hi;
 | |
| };
 | |
| 
 | |
| struct int_pair_stack
 | |
| {
 | |
|   struct int_pair *top;
 | |
| #define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
 | |
|   struct int_pair arr[STACK_SIZE];
 | |
| };
 | |
| 
 | |
| static inline void
 | |
| init_int_pair_stack (struct int_pair_stack *stack)
 | |
| {
 | |
|   stack->top = &stack->arr[0];
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
 | |
| {
 | |
|   stack->top->lo = lo;
 | |
|   stack->top->hi = hi;
 | |
|   stack->top++;
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
 | |
| {
 | |
|   stack->top--;
 | |
|   *lo = stack->top->lo;
 | |
|   *hi = stack->top->hi;
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| size_int_pair_stack (struct int_pair_stack *stack)
 | |
| {
 | |
|   return stack->top - &stack->arr[0];
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| busy_wait (void)
 | |
| {
 | |
| #if defined __i386__ || defined __x86_64__
 | |
|   __builtin_ia32_pause ();
 | |
| #elif defined __ia64__
 | |
|   __asm volatile ("hint @pause" : : : "memory");
 | |
| #elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
 | |
|   __asm volatile ("membar #LoadLoad" : : : "memory");
 | |
| #else
 | |
|   __asm volatile ("" : : : "memory");
 | |
| #endif
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| swap (int *array, int a, int b)
 | |
| {
 | |
|   int val = array[a];
 | |
|   array[a] = array[b];
 | |
|   array[b] = val;
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| choose_pivot (int *array, int lo, int hi)
 | |
| {
 | |
|   int mid = (lo + hi) / 2;
 | |
| 
 | |
|   if (array[mid] < array[lo])
 | |
|     swap (array, lo, mid);
 | |
|   if (array[hi] < array[mid])
 | |
|     {
 | |
|       swap (array, mid, hi);
 | |
|       if (array[mid] < array[lo])
 | |
| 	swap (array, lo, mid);
 | |
|     }
 | |
|   return array[mid];
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| partition (int *array, int lo, int hi)
 | |
| {
 | |
|   int pivot = choose_pivot (array, lo, hi);
 | |
|   int left = lo;
 | |
|   int right = hi;
 | |
| 
 | |
|   for (;;)
 | |
|     {
 | |
|       while (array[++left] < pivot);
 | |
|       while (array[--right] > pivot);
 | |
|       if (left >= right)
 | |
| 	break;
 | |
|       swap (array, left, right);
 | |
|     }
 | |
|   return left;
 | |
| }
 | |
| 
 | |
| static void
 | |
| sort1 (int *array, int count)
 | |
| {
 | |
|   omp_lock_t lock;
 | |
|   struct int_pair_stack global_stack;
 | |
|   int busy = 1;
 | |
|   int num_threads;
 | |
| 
 | |
|   omp_init_lock (&lock);
 | |
|   init_int_pair_stack (&global_stack);
 | |
|   #pragma omp parallel firstprivate (array, count)
 | |
|   {
 | |
|     int lo = 0, hi = 0, mid, next_lo, next_hi;
 | |
|     bool idle = true;
 | |
|     struct int_pair_stack local_stack;
 | |
| 
 | |
|     init_int_pair_stack (&local_stack);
 | |
|     if (omp_get_thread_num () == 0)
 | |
|       {
 | |
| 	num_threads = omp_get_num_threads ();
 | |
| 	hi = count - 1;
 | |
| 	idle = false;
 | |
|       }
 | |
| 
 | |
|     for (;;)
 | |
|       {
 | |
| 	if (hi - lo < THRESHOLD)
 | |
| 	  {
 | |
| 	    insertsort (array, lo, hi);
 | |
| 	    lo = hi;
 | |
| 	  }
 | |
| 	if (lo >= hi)
 | |
| 	  {
 | |
| 	    if (size_int_pair_stack (&local_stack) == 0)
 | |
| 	      {
 | |
| 	      again:
 | |
| 		omp_set_lock (&lock);
 | |
| 		if (size_int_pair_stack (&global_stack) == 0)
 | |
| 		  {
 | |
| 		    if (!idle)
 | |
| 		      busy--;
 | |
| 		    if (busy == 0)
 | |
| 		      {
 | |
| 			omp_unset_lock (&lock);
 | |
| 			break;
 | |
| 		      }
 | |
| 		    omp_unset_lock (&lock);
 | |
| 		    idle = true;
 | |
| 		    while (size_int_pair_stack (&global_stack) == 0
 | |
| 			   && busy)
 | |
| 		      busy_wait ();
 | |
| 		    goto again;
 | |
| 		  }
 | |
| 		if (idle)
 | |
| 		  busy++;
 | |
| 		pop_int_pair_stack (&global_stack, &lo, &hi);
 | |
| 		omp_unset_lock (&lock);
 | |
| 		idle = false;
 | |
| 	      }
 | |
| 	    else
 | |
| 	      pop_int_pair_stack (&local_stack, &lo, &hi);
 | |
| 	  }
 | |
| 
 | |
| 	mid = partition (array, lo, hi);
 | |
| 	if (mid - lo < hi - mid)
 | |
| 	  {
 | |
| 	    next_lo = mid;
 | |
| 	    next_hi = hi;
 | |
| 	    hi = mid - 1;
 | |
| 	  }
 | |
| 	else
 | |
| 	  {
 | |
| 	    next_lo = lo;
 | |
| 	    next_hi = mid - 1;
 | |
| 	    lo = mid;
 | |
| 	  }
 | |
| 
 | |
| 	if (next_hi - next_lo < THRESHOLD)
 | |
| 	  insertsort (array, next_lo, next_hi);
 | |
| 	else
 | |
| 	  {
 | |
| 	    if (size_int_pair_stack (&global_stack) < num_threads - 1)
 | |
| 	      {
 | |
| 		int size;
 | |
| 
 | |
| 		omp_set_lock (&lock);
 | |
| 		size = size_int_pair_stack (&global_stack);
 | |
| 		if (size < num_threads - 1 && size < STACK_SIZE)
 | |
| 		  push_int_pair_stack (&global_stack, next_lo, next_hi);
 | |
| 		else
 | |
| 		  push_int_pair_stack (&local_stack, next_lo, next_hi);
 | |
| 		omp_unset_lock (&lock);
 | |
| 	      }
 | |
| 	    else
 | |
| 	      push_int_pair_stack (&local_stack, next_lo, next_hi);
 | |
| 	  }
 | |
|       }
 | |
|     }
 | |
|   omp_destroy_lock (&lock);
 | |
| }
 | |
| 
 | |
| static void
 | |
| sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
 | |
| {
 | |
|   int mid;
 | |
| 
 | |
|   if (hi - lo < THRESHOLD)
 | |
|     {
 | |
|       insertsort (array, lo, hi);
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|   mid = partition (array, lo, hi);
 | |
| 
 | |
|   if (*busy >= num_threads)
 | |
|     {
 | |
|       sort2_1 (array, lo, mid - 1, num_threads, busy);
 | |
|       sort2_1 (array, mid, hi, num_threads, busy);
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|   #pragma omp atomic
 | |
|     *busy += 1;
 | |
| 
 | |
|   #pragma omp parallel num_threads (2) \
 | |
| 		       firstprivate (array, lo, hi, mid, num_threads, busy)
 | |
|   {
 | |
|     if (omp_get_thread_num () == 0)
 | |
|       sort2_1 (array, lo, mid - 1, num_threads, busy);
 | |
|     else
 | |
|       {
 | |
| 	sort2_1 (array, mid, hi, num_threads, busy);
 | |
| 	#pragma omp atomic
 | |
| 	  *busy -= 1;
 | |
|       }
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void
 | |
| sort2 (int *array, int count)
 | |
| {
 | |
|   int num_threads;
 | |
|   int busy = 1;
 | |
| 
 | |
|   #pragma omp parallel
 | |
|     #pragma omp single nowait
 | |
|       num_threads = omp_get_num_threads ();
 | |
| 
 | |
|   sort2_1 (array, 0, count - 1, num_threads, &busy);
 | |
| }
 | |
| 
 | |
| #if _OPENMP >= 200805
 | |
| static void
 | |
| sort3_1 (int *array, int lo, int hi)
 | |
| {
 | |
|   int mid;
 | |
| 
 | |
|   if (hi - lo < THRESHOLD)
 | |
|     {
 | |
|       insertsort (array, lo, hi);
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|   mid = partition (array, lo, hi);
 | |
|   #pragma omp task
 | |
|     sort3_1 (array, lo, mid - 1);
 | |
|   sort3_1 (array, mid, hi);
 | |
| }
 | |
| 
 | |
| static void
 | |
| sort3 (int *array, int count)
 | |
| {
 | |
|   #pragma omp parallel
 | |
|     #pragma omp single
 | |
|       sort3_1 (array, 0, count - 1);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| int
 | |
| main (int argc, char **argv)
 | |
| {
 | |
|   int i, count = 1000000;
 | |
|   double stime;
 | |
|   int *unsorted, *sorted, num_threads;
 | |
|   if (argc >= 2)
 | |
|     count = strtoul (argv[1], NULL, 0);
 | |
| 
 | |
|   unsorted = malloc (count * sizeof (int));
 | |
|   sorted = malloc (count * sizeof (int));
 | |
|   if (unsorted == NULL || sorted == NULL)
 | |
|     {
 | |
|       puts ("allocation failure");
 | |
|       exit (1);
 | |
|     }
 | |
| 
 | |
|   srand (0xdeadbeef);
 | |
|   for (i = 0; i < count; i++)
 | |
|     unsorted[i] = rand ();
 | |
| 
 | |
|   omp_set_nested (1);
 | |
|   omp_set_dynamic (0);
 | |
|   #pragma omp parallel
 | |
|     #pragma omp single nowait
 | |
|       num_threads = omp_get_num_threads ();
 | |
|   printf ("Threads: %d\n", num_threads);
 | |
| 
 | |
|   memcpy (sorted, unsorted, count * sizeof (int));
 | |
|   stime = omp_get_wtime ();
 | |
|   sort1 (sorted, count);
 | |
|   verify ("sort1", stime, sorted, count);
 | |
| 
 | |
|   memcpy (sorted, unsorted, count * sizeof (int));
 | |
|   stime = omp_get_wtime ();
 | |
|   sort2 (sorted, count);
 | |
|   verify ("sort2", stime, sorted, count);
 | |
| 
 | |
| #if _OPENMP >= 200805
 | |
|   memcpy (sorted, unsorted, count * sizeof (int));
 | |
|   stime = omp_get_wtime ();
 | |
|   sort3 (sorted, count);
 | |
|   verify ("sort3", stime, sorted, count);
 | |
| #endif
 | |
| 
 | |
|   return 0;
 | |
| }
 |