Commit 36681f15 authored by Austin Zheng's avatar Austin Zheng Committed by Alex Deucher
Browse files

drm/amd/display: Account For OTO Prefetch Bandwidth When Calculating Urgent Bandwidth



[Why]
1) The current calculations for OTO prefetch bandwidth do not consider the number of DPP pipes in use.
As a result, OTO prefetch bandwidth may be larger than the vactive bandwidth if multiple DPP pipes are used.
OTO prefetch bandwidth should never exceed the vactive bandwidth.

2) Mode programming may be mismatched with mode support
In cases where mode support has chosen to use the equalized (equ) prefetch schedule,
mode programming may end up using oto prefetch schedule instead.
The bandwidth required to do the oto schedule may end up being higher than the equ schedule.
This can cause the required urgent bandwidth to exceed the available urgent bandwidth.

[How]
Output the oto prefetch bandwidth and incorperate it into the urgent bandwidth calculations
even if the prefetch schedule being used is not the oto schedule.

Reviewed-by: default avatarDillon Varone <dillon.varone@amd.com>
Signed-off-by: default avatarAustin Zheng <Austin.Zheng@amd.com>
Signed-off-by: default avatarZaeem Mohamed <zaeem.mohamed@amd.com>
Tested-by: default avatarDaniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4a4077b4
Loading
Loading
Loading
Loading
+24 −1
Original line number Diff line number Diff line
@@ -4909,6 +4909,7 @@ static double get_urgent_bandwidth_required(
	double ReadBandwidthChroma[],
	double PrefetchBandwidthLuma[],
	double PrefetchBandwidthChroma[],
	double PrefetchBandwidthOto[],
	double excess_vactive_fill_bw_l[],
	double excess_vactive_fill_bw_c[],
	double cursor_bw[],
@@ -4972,8 +4973,9 @@ static double get_urgent_bandwidth_required(
			l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k];
			l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur;
			l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
			l->flip_and_prefetch_bw_oto = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthOto[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
			l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k];
			surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw);
			surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_oto);
			/* export peak required bandwidth for the surface */
			surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
@@ -5171,6 +5173,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
	s->Tsw_est3 = 0.0;
	s->cursor_prefetch_bytes = 0;
	*p->prefetch_cursor_bw = 0;
	*p->RequiredPrefetchBWOTO = 0.0;
	dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
@@ -5384,6 +5387,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
		s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime;
	}
	/* oto prefetch bw should be always be less than total vactive bw */
	DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
	s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
	s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime));
@@ -5394,6 +5400,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
					p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
					(p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
	/* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch.
	 * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule
	 * and the required bandwidth increases when going from ms to mp
	 */
	*p->RequiredPrefetchBWOTO = s->prefetch_bw_oto;
#ifdef __DML_VBA_DEBUG__
	dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
	dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
@@ -6154,6 +6166,7 @@ static void calculate_peak_bandwidth_required(
				p->surface_read_bandwidth_c,
				l->zero_array, //PrefetchBandwidthLuma,
				l->zero_array, //PrefetchBandwidthChroma,
				l->zero_array, //PrefetchBWOTO
				l->zero_array,
				l->zero_array,
				l->zero_array,
@@ -6190,6 +6203,7 @@ static void calculate_peak_bandwidth_required(
				p->surface_read_bandwidth_c,
				l->zero_array, //PrefetchBandwidthLuma,
				l->zero_array, //PrefetchBandwidthChroma,
				l->zero_array, //PrefetchBWOTO
				p->excess_vactive_fill_bw_l,
				p->excess_vactive_fill_bw_c,
				p->cursor_bw,
@@ -6226,6 +6240,7 @@ static void calculate_peak_bandwidth_required(
				p->surface_read_bandwidth_c,
				p->prefetch_bandwidth_l,
				p->prefetch_bandwidth_c,
				p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw
				p->excess_vactive_fill_bw_l,
				p->excess_vactive_fill_bw_c,
				p->cursor_bw,
@@ -6262,6 +6277,7 @@ static void calculate_peak_bandwidth_required(
				p->surface_read_bandwidth_c,
				p->prefetch_bandwidth_l,
				p->prefetch_bandwidth_c,
				p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw
				p->excess_vactive_fill_bw_l,
				p->excess_vactive_fill_bw_c,
				p->cursor_bw,
@@ -6298,6 +6314,7 @@ static void calculate_peak_bandwidth_required(
				p->surface_read_bandwidth_c,
				p->prefetch_bandwidth_l,
				p->prefetch_bandwidth_c,
				p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw
				p->excess_vactive_fill_bw_l,
				p->excess_vactive_fill_bw_c,
				p->cursor_bw,
@@ -9060,6 +9077,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
				CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
				CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
				CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
				CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k];
				CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
				CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
				CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
@@ -9204,6 +9222,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
				calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
				calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
				calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
				calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO;
				calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
				calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
				calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
@@ -9370,6 +9389,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
			calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
			calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
			calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
			calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO;
			calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
			calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
			calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
@@ -11286,6 +11306,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
			CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k];
			CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k];
			CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k];
			CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &s->dummy_single_array[0][k];
			CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k];
			CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k];
			CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k];
@@ -11428,6 +11449,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
			calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
			calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
			calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
			calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0];
			calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
			calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
			calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
@@ -11560,6 +11582,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
			calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
			calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
			calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
			calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[k];
			calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
			calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
			calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
+5 −0
Original line number Diff line number Diff line
@@ -484,6 +484,8 @@ struct dml2_core_internal_mode_support {
	double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK];
	double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES];
	double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES];
	/* oto bw should also be considered when calculating urgent bw to avoid situations oto/equ mismatches between ms and mp */
	double RequiredPrefetchBWOTO[DML2_MAX_PLANES];
	double cursor_bw[DML2_MAX_PLANES];
	double prefetch_cursor_bw[DML2_MAX_PLANES];
	double prefetch_vmrow_bw[DML2_MAX_PLANES];
@@ -1381,6 +1383,7 @@ struct dml2_core_shared_get_urgent_bandwidth_required_locals {
	double vm_row_bw;
	double flip_and_active_bw;
	double flip_and_prefetch_bw;
	double flip_and_prefetch_bw_oto;
	double active_and_excess_bw;
};

@@ -1792,6 +1795,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params {
	double *VRatioPrefetchC;
	double *RequiredPrefetchPixelDataBWLuma;
	double *RequiredPrefetchPixelDataBWChroma;
	double *RequiredPrefetchBWOTO;
	bool *NotEnoughTimeForDynamicMetadata;
	double *Tno_bw;
	double *Tno_bw_flip;
@@ -2025,6 +2029,7 @@ struct dml2_core_calcs_calculate_peak_bandwidth_required_params {
	double *surface_read_bandwidth_c;
	double *prefetch_bandwidth_l;
	double *prefetch_bandwidth_c;
	double *prefetch_bandwidth_oto;
	double *excess_vactive_fill_bw_l;
	double *excess_vactive_fill_bw_c;
	double *cursor_bw;