Commit a74f4d99 authored by Karol Wachowski's avatar Karol Wachowski Committed by Jacek Lawrynowicz
Browse files

accel/ivpu: Defer MMU root page table allocation



Defer root page table allocation and unify context init/fini functions.
Move allocation of the root page table from the file_priv_open function to
perform a lazy allocation approach during ivpu_bo_pin().

By doing so, we avoid the overhead of allocating page tables for simple
operations like GET_PARAM that do not require them.
Additionally, the MMU context descriptor table initialization has been
moved to the ivpu_mmu_context_map_page function.

This change streamlines the process and ensures that the descriptor table
is only initialized when it is actually needed.
Refactor init/fini functions to remove redundant code and make the context
management more straightforward.

Overall, these changes lead to a reduction in the time taken by the file
descriptor open operation, as the costly root page table allocation is now
avoided for operations that do not require it.

Signed-off-by: default avatarKarol Wachowski <karol.wachowski@intel.com>
Reviewed-by: default avatarJacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: default avatarJeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: default avatarJacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241017145817.121590-3-jacek.lawrynowicz@linux.intel.com
parent ce68f86c
Loading
Loading
Loading
Loading
+3 −9
Original line number Diff line number Diff line
@@ -86,7 +86,7 @@ static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *fi

		ivpu_cmdq_release_all_locked(file_priv);
		ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx);
		ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
		ivpu_mmu_context_fini(vdev, &file_priv->ctx);
		file_priv->bound = false;
		drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id));
	}
@@ -254,9 +254,7 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
		goto err_unlock;
	}

	ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id);
	if (ret)
		goto err_xa_erase;
	ivpu_mmu_context_init(vdev, &file_priv->ctx, ctx_id);

	file_priv->default_job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK,
						      (file_priv->ctx.id - 1));
@@ -273,8 +271,6 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)

	return 0;

err_xa_erase:
	xa_erase_irq(&vdev->context_xa, ctx_id);
err_unlock:
	mutex_unlock(&vdev->context_list_lock);
	mutex_destroy(&file_priv->ms_lock);
@@ -652,9 +648,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
	if (ret)
		goto err_shutdown;

	ret = ivpu_mmu_global_context_init(vdev);
	if (ret)
		goto err_shutdown;
	ivpu_mmu_global_context_init(vdev);

	ret = ivpu_mmu_init(vdev);
	if (ret)
+30 −64
Original line number Diff line number Diff line
@@ -696,7 +696,7 @@ int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid)
	return ret;
}

static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_dma, bool valid)
{
	struct ivpu_mmu_info *mmu = vdev->mmu;
	struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
@@ -708,8 +708,8 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
		return -EINVAL;

	entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
	drm_WARN_ON(&vdev->drm, (entry[0] & IVPU_MMU_CD_0_V) == valid);

	if (cd_dma != 0) {
	cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) |
		FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) |
		FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) |
@@ -720,8 +720,7 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
		IVPU_MMU_CD_0_TCR_EPD1 |
		IVPU_MMU_CD_0_AA64 |
		IVPU_MMU_CD_0_R |
			IVPU_MMU_CD_0_ASET |
			IVPU_MMU_CD_0_V;
		IVPU_MMU_CD_0_ASET;
	cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK;
	cd[2] = 0;
	cd[3] = 0x0000000000007444;
@@ -729,9 +728,9 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
	/* For global context generate memory fault on VPU */
	if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
		cd[0] |= IVPU_MMU_CD_0_A;
	} else {
		memset(cd, 0, sizeof(cd));
	}

	if (valid)
		cd[0] |= IVPU_MMU_CD_0_V;

	WRITE_ONCE(entry[1], cd[1]);
	WRITE_ONCE(entry[2], cd[2]);
@@ -741,8 +740,8 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
	if (!ivpu_is_force_snoop_enabled(vdev))
		clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);

	ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
		 cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);
	ivpu_dbg(vdev, MMU, "CDTAB set %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
		 valid ? "valid" : "invalid", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);

	mutex_lock(&mmu->lock);
	if (!mmu->on)
@@ -758,33 +757,6 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
	return ret;
}

static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev)
{
	int ret;

	ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma);
	if (ret)
		ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret);

	return ret;
}

static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma)
{
	int ret;

	if (ssid == 0) {
		ivpu_err(vdev, "Invalid SSID: %u\n", ssid);
		return -EINVAL;
	}

	ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma);
	if (ret)
		ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret);

	return ret;
}

int ivpu_mmu_init(struct ivpu_device *vdev)
{
	struct ivpu_mmu_info *mmu = vdev->mmu;
@@ -808,12 +780,6 @@ int ivpu_mmu_init(struct ivpu_device *vdev)
		return ret;
	}

	ret = ivpu_mmu_cd_add_gbl(vdev);
	if (ret) {
		ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret);
		return ret;
	}

	ret = ivpu_mmu_enable(vdev);
	if (ret) {
		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
@@ -966,12 +932,12 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)
	REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val);
}

int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
{
	return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma);
	return ivpu_mmu_cdtab_entry_set(vdev, ssid, pgtable->pgd_dma, true);
}

void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid)
void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid)
{
	ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */
	ivpu_mmu_cdtab_entry_set(vdev, ssid, 0, false);
}
+2 −2
Original line number Diff line number Diff line
@@ -40,8 +40,8 @@ struct ivpu_mmu_info {
int ivpu_mmu_init(struct ivpu_device *vdev);
void ivpu_mmu_disable(struct ivpu_device *vdev);
int ivpu_mmu_enable(struct ivpu_device *vdev);
int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable);
void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid);
int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable);
void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid);
int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid);

void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev);
+75 −70
Original line number Diff line number Diff line
@@ -90,19 +90,6 @@ static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_
	}
}

static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
	dma_addr_t pgd_dma;

	pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
	if (!pgtable->pgd_dma_ptr)
		return -ENOMEM;

	pgtable->pgd_dma = pgd_dma;

	return 0;
}

static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
	int pgd_idx, pud_idx, pmd_idx;
@@ -140,6 +127,27 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt
	}

	ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
	pgtable->pgd_dma_ptr = NULL;
	pgtable->pgd_dma = 0;
}

static u64*
ivpu_mmu_ensure_pgd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
	u64 *pgd_dma_ptr = pgtable->pgd_dma_ptr;
	dma_addr_t pgd_dma;

	if (pgd_dma_ptr)
		return pgd_dma_ptr;

	pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
	if (!pgd_dma_ptr)
		return NULL;

	pgtable->pgd_dma_ptr = pgd_dma_ptr;
	pgtable->pgd_dma = pgd_dma;

	return pgd_dma_ptr;
}

static u64*
@@ -237,6 +245,12 @@ ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx
	int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
	int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);

	drm_WARN_ON(&vdev->drm, ctx->id == IVPU_RESERVED_CONTEXT_MMU_SSID);

	/* Allocate PGD - first level page table if needed */
	if (!ivpu_mmu_ensure_pgd(vdev, &ctx->pgtable))
		return -ENOMEM;

	/* Allocate PUD - second level page table if needed */
	if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx))
		return -ENOMEM;
@@ -448,12 +462,21 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
		ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
		if (ret) {
			ivpu_err(vdev, "Failed to map context pages\n");
			mutex_unlock(&ctx->lock);
			return ret;
			goto err_unlock;
		}
		vpu_addr += size;
	}

	if (!ctx->is_cd_valid) {
		ret = ivpu_mmu_cd_set(vdev, ctx->id, &ctx->pgtable);
		if (ret) {
			ivpu_err(vdev, "Failed to set context descriptor for context %u: %d\n",
				 ctx->id, ret);
			goto err_unlock;
		}
		ctx->is_cd_valid = true;
	}

	/* Ensure page table modifications are flushed from wc buffers to memory */
	wmb();

@@ -463,6 +486,11 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
	if (ret)
		ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
	return ret;

err_unlock:
	mutex_unlock(&ctx->lock);
	return ret;

}

void
@@ -530,20 +558,12 @@ ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *n
	mutex_unlock(&ctx->lock);
}

static int
ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
{
	u64 start, end;
	int ret;

	mutex_init(&ctx->lock);

	ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
	if (ret) {
		ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret);
		return ret;
	}

	if (!context_id) {
		start = vdev->hw->ranges.global.start;
		end = vdev->hw->ranges.shave.end;
@@ -554,41 +574,59 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3

	drm_mm_init(&ctx->mm, start, end - start);
	ctx->id = context_id;

	return 0;
}

static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
	if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr))
		return;
	if (ctx->is_cd_valid) {
		ivpu_mmu_cd_clear(vdev, ctx->id);
		ctx->is_cd_valid = false;
	}

	mutex_destroy(&ctx->lock);
	ivpu_mmu_pgtables_free(vdev, &ctx->pgtable);
	drm_mm_takedown(&ctx->mm);

	ctx->pgtable.pgd_dma_ptr = NULL;
	ctx->pgtable.pgd_dma = 0;
}

int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
void ivpu_mmu_global_context_init(struct ivpu_device *vdev)
{
	return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
	ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
}

void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
{
	return ivpu_mmu_context_fini(vdev, &vdev->gctx);
	ivpu_mmu_context_fini(vdev, &vdev->gctx);
}

int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev)
{
	return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);
	int ret;

	ivpu_mmu_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);

	mutex_lock(&vdev->rctx.lock);

	if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) {
		ivpu_err(vdev, "Failed to allocate root page table for reserved context\n");
		ret = -ENOMEM;
		goto unlock;
	}

	ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable);
	if (ret) {
		ivpu_err(vdev, "Failed to set context descriptor for reserved context\n");
		goto unlock;
	}

unlock:
	mutex_unlock(&vdev->rctx.lock);
	return ret;
}

void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
{
	return ivpu_mmu_user_context_fini(vdev, &vdev->rctx);
	ivpu_mmu_cd_clear(vdev, vdev->rctx.id);
	ivpu_mmu_context_fini(vdev, &vdev->rctx);
}

void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
@@ -603,36 +641,3 @@ void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)

	xa_unlock(&vdev->context_xa);
}

int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
{
	int ret;

	drm_WARN_ON(&vdev->drm, !ctx_id);

	ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
	if (ret) {
		ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret);
		return ret;
	}

	ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
	if (ret) {
		ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret);
		goto err_context_fini;
	}

	return 0;

err_context_fini:
	ivpu_mmu_context_fini(vdev, ctx);
	return ret;
}

void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
	drm_WARN_ON(&vdev->drm, !ctx->id);

	ivpu_mmu_clear_pgtable(vdev, ctx->id);
	ivpu_mmu_context_fini(vdev, ctx);
}
+5 −4
Original line number Diff line number Diff line
@@ -23,19 +23,20 @@ struct ivpu_mmu_pgtable {
};

struct ivpu_mmu_context {
	struct mutex lock; /* Protects: mm, pgtable */
	struct mutex lock; /* Protects: mm, pgtable, is_cd_valid */
	struct drm_mm mm;
	struct ivpu_mmu_pgtable pgtable;
	bool is_cd_valid;
	u32 id;
};

int ivpu_mmu_global_context_init(struct ivpu_device *vdev);
void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id);
void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
void ivpu_mmu_global_context_init(struct ivpu_device *vdev);
void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev);
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev);

int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id);
void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);

int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,