mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			200 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			200 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			C
		
	
	
	
// Copyright 2012 The Go Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE file.
 | 
						|
 | 
						|
// Parallel for algorithm.
 | 
						|
 | 
						|
#include "runtime.h"
 | 
						|
#include "arch.h"
 | 
						|
 | 
						|
struct ParForThread
 | 
						|
{
 | 
						|
	// the thread's iteration space [32lsb, 32msb)
 | 
						|
	uint64 pos;
 | 
						|
	// stats
 | 
						|
	uint64 nsteal;
 | 
						|
	uint64 nstealcnt;
 | 
						|
	uint64 nprocyield;
 | 
						|
	uint64 nosyield;
 | 
						|
	uint64 nsleep;
 | 
						|
	byte pad[CacheLineSize];
 | 
						|
};
 | 
						|
 | 
						|
ParFor*
 | 
						|
runtime_parforalloc(uint32 nthrmax)
 | 
						|
{
 | 
						|
	ParFor *desc;
 | 
						|
 | 
						|
	// The ParFor object is followed by CacheLineSize padding
 | 
						|
	// and then nthrmax ParForThread.
 | 
						|
	desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
 | 
						|
	desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
 | 
						|
	desc->nthrmax = nthrmax;
 | 
						|
	return desc;
 | 
						|
}
 | 
						|
 | 
						|
void
 | 
						|
runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
 | 
						|
{
 | 
						|
	uint32 i, begin, end;
 | 
						|
	uint64 *pos;
 | 
						|
 | 
						|
	if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
 | 
						|
		runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
 | 
						|
		runtime_throw("parfor: invalid args");
 | 
						|
	}
 | 
						|
 | 
						|
	desc->body = body;
 | 
						|
	desc->done = 0;
 | 
						|
	desc->nthr = nthr;
 | 
						|
	desc->thrseq = 0;
 | 
						|
	desc->cnt = n;
 | 
						|
	desc->ctx = ctx;
 | 
						|
	desc->wait = wait;
 | 
						|
	desc->nsteal = 0;
 | 
						|
	desc->nstealcnt = 0;
 | 
						|
	desc->nprocyield = 0;
 | 
						|
	desc->nosyield = 0;
 | 
						|
	desc->nsleep = 0;
 | 
						|
	for(i=0; i<nthr; i++) {
 | 
						|
		begin = (uint64)n*i / nthr;
 | 
						|
		end = (uint64)n*(i+1) / nthr;
 | 
						|
		pos = &desc->thr[i].pos;
 | 
						|
		if(((uintptr)pos & 7) != 0)
 | 
						|
			runtime_throw("parforsetup: pos is not aligned");
 | 
						|
		*pos = (uint64)begin | (((uint64)end)<<32);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
void
 | 
						|
runtime_parfordo(ParFor *desc)
 | 
						|
{
 | 
						|
	ParForThread *me;
 | 
						|
	uint32 tid, begin, end, begin2, try, victim, i;
 | 
						|
	uint64 *mypos, *victimpos, pos, newpos;
 | 
						|
	void (*body)(ParFor*, uint32);
 | 
						|
	bool idle;
 | 
						|
 | 
						|
	// Obtain 0-based thread index.
 | 
						|
	tid = runtime_xadd(&desc->thrseq, 1) - 1;
 | 
						|
	if(tid >= desc->nthr) {
 | 
						|
		runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
 | 
						|
		runtime_throw("parfor: invalid tid");
 | 
						|
	}
 | 
						|
 | 
						|
	// If single-threaded, just execute the for serially.
 | 
						|
	if(desc->nthr==1) {
 | 
						|
		for(i=0; i<desc->cnt; i++)
 | 
						|
			desc->body(desc, i);
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	body = desc->body;
 | 
						|
	me = &desc->thr[tid];
 | 
						|
	mypos = &me->pos;
 | 
						|
	for(;;) {
 | 
						|
		for(;;) {
 | 
						|
			// While there is local work,
 | 
						|
			// bump low index and execute the iteration.
 | 
						|
			pos = runtime_xadd64(mypos, 1);
 | 
						|
			begin = (uint32)pos-1;
 | 
						|
			end = (uint32)(pos>>32);
 | 
						|
			if(begin < end) {
 | 
						|
				body(desc, begin);
 | 
						|
				continue;
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		}
 | 
						|
 | 
						|
		// Out of work, need to steal something.
 | 
						|
		idle = false;
 | 
						|
		for(try=0;; try++) {
 | 
						|
			// If we don't see any work for long enough,
 | 
						|
			// increment the done counter...
 | 
						|
			if(try > desc->nthr*4 && !idle) {
 | 
						|
				idle = true;
 | 
						|
				runtime_xadd(&desc->done, 1);
 | 
						|
			}
 | 
						|
			// ...if all threads have incremented the counter,
 | 
						|
			// we are done.
 | 
						|
			if(desc->done + !idle == desc->nthr) {
 | 
						|
				if(!idle)
 | 
						|
					runtime_xadd(&desc->done, 1);
 | 
						|
				goto exit;
 | 
						|
			}
 | 
						|
			// Choose a random victim for stealing.
 | 
						|
			victim = runtime_fastrand1() % (desc->nthr-1);
 | 
						|
			if(victim >= tid)
 | 
						|
				victim++;
 | 
						|
			victimpos = &desc->thr[victim].pos;
 | 
						|
			for(;;) {
 | 
						|
				// See if it has any work.
 | 
						|
				pos = runtime_atomicload64(victimpos);
 | 
						|
				begin = (uint32)pos;
 | 
						|
				end = (uint32)(pos>>32);
 | 
						|
				if(begin+1 >= end) {
 | 
						|
					begin = end = 0;
 | 
						|
					break;
 | 
						|
				}
 | 
						|
				if(idle) {
 | 
						|
					runtime_xadd(&desc->done, -1);
 | 
						|
					idle = false;
 | 
						|
				}
 | 
						|
				begin2 = begin + (end-begin)/2;
 | 
						|
				newpos = (uint64)begin | (uint64)begin2<<32;
 | 
						|
				if(runtime_cas64(victimpos, pos, newpos)) {
 | 
						|
					begin = begin2;
 | 
						|
					break;
 | 
						|
				}
 | 
						|
			}
 | 
						|
			if(begin < end) {
 | 
						|
				// Has successfully stolen some work.
 | 
						|
				if(idle)
 | 
						|
					runtime_throw("parfor: should not be idle");
 | 
						|
				runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
 | 
						|
				me->nsteal++;
 | 
						|
				me->nstealcnt += end-begin;
 | 
						|
				break;
 | 
						|
			}
 | 
						|
			// Backoff.
 | 
						|
			if(try < desc->nthr) {
 | 
						|
				// nothing
 | 
						|
			} else if (try < 4*desc->nthr) {
 | 
						|
				me->nprocyield++;
 | 
						|
				runtime_procyield(20);
 | 
						|
			// If a caller asked not to wait for the others, exit now
 | 
						|
			// (assume that most work is already done at this point).
 | 
						|
			} else if (!desc->wait) {
 | 
						|
				if(!idle)
 | 
						|
					runtime_xadd(&desc->done, 1);
 | 
						|
				goto exit;
 | 
						|
			} else if (try < 6*desc->nthr) {
 | 
						|
				me->nosyield++;
 | 
						|
				runtime_osyield();
 | 
						|
			} else {
 | 
						|
				me->nsleep++;
 | 
						|
				runtime_usleep(1);
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
exit:
 | 
						|
	runtime_xadd64(&desc->nsteal, me->nsteal);
 | 
						|
	runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
 | 
						|
	runtime_xadd64(&desc->nprocyield, me->nprocyield);
 | 
						|
	runtime_xadd64(&desc->nosyield, me->nosyield);
 | 
						|
	runtime_xadd64(&desc->nsleep, me->nsleep);
 | 
						|
	me->nsteal = 0;
 | 
						|
	me->nstealcnt = 0;
 | 
						|
	me->nprocyield = 0;
 | 
						|
	me->nosyield = 0;
 | 
						|
	me->nsleep = 0;
 | 
						|
}
 | 
						|
 | 
						|
// For testing from Go.
 | 
						|
void
 | 
						|
runtime_parforiters(ParFor *desc, uintptr tid, uintptr *start, uintptr *end)
 | 
						|
{
 | 
						|
	*start = (uint32)desc->thr[tid].pos;
 | 
						|
	*end = (uint32)(desc->thr[tid].pos>>32);
 | 
						|
}
 |