sched/fair: Fix zero_vruntime tracking (b3d99f43) · Commits · git / linux-net

kernel/sched/fair.c

+57 −27

Original line number	Diff line number	Diff line
		@@ -589,6 +589,21 @@ static inline bool entity_before(const struct sched_entity *a,
		return vruntime_cmp(a->deadline, "<", b->deadline);
		}

		/*
		* Per avg_vruntime() below, cfs_rq::zero_vruntime is only slightly stale
		* and this value should be no more than two lag bounds. Which puts it in the
		* general order of:
		*
		* (slice + TICK_NSEC) << NICE_0_LOAD_SHIFT
		*
		* which is around 44 bits in size (on 64bit); that is 20 for
		* NICE_0_LOAD_SHIFT, another 20 for NSEC_PER_MSEC and then a handful for
		* however many msec the actual slice+tick ends up begin.
		*
		* (disregarding the actual divide-by-weight part makes for the worst case
		* weight of 2, which nicely cancels vs the fuzz in zero_vruntime not actually
		* being the zero-lag point).
		*/
		static inline s64 entity_key(struct cfs_rq cfs_rq, struct sched_entity se)
		{
		return vruntime_op(se->vruntime, "-", cfs_rq->zero_vruntime);
		@@ -676,39 +691,61 @@ sum_w_vruntime_sub(struct cfs_rq cfs_rq, struct sched_entity se)
		}

		static inline
		void sum_w_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
		void update_zero_vruntime(struct cfs_rq *cfs_rq, s64 delta)
		{
		/*
		* v' = v + d ==> sum_w_vruntime' = sum_runtime - d*sum_weight
		* v' = v + d ==> sum_w_vruntime' = sum_w_vruntime - d*sum_weight
		*/
		cfs_rq->sum_w_vruntime -= cfs_rq->sum_weight * delta;
		cfs_rq->zero_vruntime += delta;
		}

		/*
		* Specifically: avg_runtime() + 0 must result in entity_eligible() := true
		* Specifically: avg_vruntime() + 0 must result in entity_eligible() := true
		* For this to be so, the result of this function must have a left bias.
		*
		* Called in:
		* - place_entity() -- before enqueue
		* - update_entity_lag() -- before dequeue
		* - entity_tick()
		*
		* This means it is one entry 'behind' but that puts it close enough to where
		* the bound on entity_key() is at most two lag bounds.
		*/
		u64 avg_vruntime(struct cfs_rq *cfs_rq)
		{
		struct sched_entity *curr = cfs_rq->curr;
		s64 avg = cfs_rq->sum_w_vruntime;
		long load = cfs_rq->sum_weight;
		long weight = cfs_rq->sum_weight;
		s64 delta = 0;

		if (curr && curr->on_rq) {
		unsigned long weight = scale_load_down(curr->load.weight);
		if (curr && !curr->on_rq)
		curr = NULL;

		avg += entity_key(cfs_rq, curr) * weight;
		load += weight;
		if (weight) {
		s64 runtime = cfs_rq->sum_w_vruntime;

		if (curr) {
		unsigned long w = scale_load_down(curr->load.weight);

		runtime += entity_key(cfs_rq, curr) * w;
		weight += w;
		}

		if (load) {
		/* sign flips effective floor / ceiling */
		if (avg < 0)
		avg -= (load - 1);
		avg = div_s64(avg, load);
		if (runtime < 0)
		runtime -= (weight - 1);

		delta = div_s64(runtime, weight);
		} else if (curr) {
		/*
		* When there is but one element, it is the average.
		*/
		delta = curr->vruntime - cfs_rq->zero_vruntime;
		}

		return cfs_rq->zero_vruntime + avg;
		update_zero_vruntime(cfs_rq, delta);

		return cfs_rq->zero_vruntime;
		}

		/*
		@@ -777,16 +814,6 @@ int entity_eligible(struct cfs_rq cfs_rq, struct sched_entity se)
		return vruntime_eligible(cfs_rq, se->vruntime);
		}

		static void update_zero_vruntime(struct cfs_rq *cfs_rq)
		{
		u64 vruntime = avg_vruntime(cfs_rq);
		s64 delta = vruntime_op(vruntime, "-", cfs_rq->zero_vruntime);

		sum_w_vruntime_update(cfs_rq, delta);

		cfs_rq->zero_vruntime = vruntime;
		}

		static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq)
		{
		struct sched_entity *root = __pick_root_entity(cfs_rq);
		@@ -856,7 +883,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
		static void __enqueue_entity(struct cfs_rq cfs_rq, struct sched_entity se)
		{
		sum_w_vruntime_add(cfs_rq, se);
		update_zero_vruntime(cfs_rq);
		se->min_vruntime = se->vruntime;
		se->min_slice = se->slice;
		rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
		@@ -868,7 +894,6 @@ static void __dequeue_entity(struct cfs_rq cfs_rq, struct sched_entity se)
		rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
		&min_vruntime_cb);
		sum_w_vruntime_sub(cfs_rq, se);
		update_zero_vruntime(cfs_rq);
		}

		struct sched_entity __pick_root_entity(struct cfs_rq cfs_rq)
		@@ -5524,6 +5549,11 @@ entity_tick(struct cfs_rq cfs_rq, struct sched_entity curr, int queued)
		update_load_avg(cfs_rq, curr, UPDATE_TG);
		update_cfs_group(curr);

		/*
		* Pulls along cfs_rq::zero_vruntime.
		*/
		avg_vruntime(cfs_rq);

		#ifdef CONFIG_SCHED_HRTICK
		/*
		* queued ticks are scheduled to match the slice, so don't bother