Commit 483e0bd8 authored by Nicolin Chen's avatar Nicolin Chen Committed by Will Deacon
Browse files

iommu/tegra241-cmdqv: Do not allocate vcmdq until dma_set_mask_and_coherent



It's observed that, when the first 4GB of system memory was reserved, all
VCMDQ allocations failed (even with the smallest qsz in the last attempt):
    arm-smmu-v3: found companion CMDQV device: NVDA200C:00
    arm-smmu-v3: option mask 0x10
    arm-smmu-v3: failed to allocate queue (0x8000 bytes) for vcmdq0
    acpi NVDA200C:00: tegra241_cmdqv: Falling back to standard SMMU CMDQ
    arm-smmu-v3: ias 48-bit, oas 48-bit (features 0x001e1fbf)
    arm-smmu-v3: allocated 524288 entries for cmdq
    arm-smmu-v3: allocated 524288 entries for evtq
    arm-smmu-v3: allocated 524288 entries for priq

This is because the 4GB reserved memory shifted the entire DMA zone from a
lower 32-bit range (on a system without the 4GB carveout) to higher range,
while the dev->coherent_dma_mask was set to DMA_BIT_MASK(32) by default.

The dma_set_mask_and_coherent() call is done in arm_smmu_device_hw_probe()
of the SMMU driver. So any DMA allocation from tegra241_cmdqv_probe() must
wait until the coherent_dma_mask is correctly set.

Move the vintf/vcmdq structure initialization routine into a different op,
"init_structures". Call it at the end of arm_smmu_init_structures(), where
standard SMMU queues get allocated.

Most of the impl_ops aren't ready until vintf/vcmdq structure are init-ed.
So replace the full impl_ops with an init_ops in __tegra241_cmdqv_probe().

And switch to tegra241_cmdqv_impl_ops later in arm_smmu_init_structures().
Note that tegra241_cmdqv_impl_ops does not link to the new init_structures
op after this switch, since there is no point in having it once it's done.

Fixes: 918eb5c8 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV")
Reported-by: default avatarMatt Ochs <mochs@nvidia.com>
Signed-off-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/530993c3aafa1b0fc3d879b8119e13c629d12e2b.1725503154.git.nicolinc@nvidia.com


Signed-off-by: default avatarWill Deacon <will@kernel.org>
parent 2408b81f
Loading
Loading
Loading
Loading
+8 −1
Original line number Diff line number Diff line
@@ -3744,7 +3744,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
	if (ret)
		return ret;

	return arm_smmu_init_strtab(smmu);
	ret = arm_smmu_init_strtab(smmu);
	if (ret)
		return ret;

	if (smmu->impl_ops && smmu->impl_ops->init_structures)
		return smmu->impl_ops->init_structures(smmu);

	return 0;
}

static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
+1 −0
Original line number Diff line number Diff line
@@ -643,6 +643,7 @@ struct arm_smmu_strtab_cfg {
struct arm_smmu_impl_ops {
	int (*device_reset)(struct arm_smmu_device *smmu);
	void (*device_remove)(struct arm_smmu_device *smmu);
	int (*init_structures)(struct arm_smmu_device *smmu);
	struct arm_smmu_cmdq *(*get_secondary_cmdq)(
		struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent);
};
+51 −32
Original line number Diff line number Diff line
@@ -755,18 +755,65 @@ tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq)
	return res;
}

static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
{
	struct tegra241_cmdqv *cmdqv =
		container_of(smmu, struct tegra241_cmdqv, smmu);
	struct tegra241_vintf *vintf;
	int lidx;
	int ret;

	vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
	if (!vintf)
		goto out_fallback;

	/* Init VINTF0 for in-kernel use */
	ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
	if (ret) {
		dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret);
		goto free_vintf;
	}

	/* Preallocate logical VCMDQs to VINTF0 */
	for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
		struct tegra241_vcmdq *vcmdq;

		vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
		if (IS_ERR(vcmdq))
			goto free_lvcmdq;
	}

	/* Now, we are ready to run all the impl ops */
	smmu->impl_ops = &tegra241_cmdqv_impl_ops;
	return 0;

free_lvcmdq:
	for (lidx--; lidx >= 0; lidx--)
		tegra241_vintf_free_lvcmdq(vintf, lidx);
	tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
free_vintf:
	kfree(vintf);
out_fallback:
	dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n");
	smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV;
	tegra241_cmdqv_remove(smmu);
	return 0;
}

struct dentry *cmdqv_debugfs_dir;

static struct arm_smmu_device *
__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,
		       int irq)
{
	static const struct arm_smmu_impl_ops init_ops = {
		.init_structures = tegra241_cmdqv_init_structures,
		.device_remove = tegra241_cmdqv_remove,
	};
	struct tegra241_cmdqv *cmdqv = NULL;
	struct arm_smmu_device *new_smmu;
	struct tegra241_vintf *vintf;
	void __iomem *base;
	u32 regval;
	int lidx;
	int ret;

	static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0);
@@ -815,26 +862,6 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,

	ida_init(&cmdqv->vintf_ids);

	vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
	if (!vintf)
		goto destroy_ids;

	/* Init VINTF0 for in-kernel use */
	ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
	if (ret) {
		dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret);
		goto free_vintf;
	}

	/* Preallocate logical VCMDQs to VINTF0 */
	for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
		struct tegra241_vcmdq *vcmdq;

		vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
		if (IS_ERR(vcmdq))
			goto free_lvcmdq;
	}

#ifdef CONFIG_IOMMU_DEBUGFS
	if (!cmdqv_debugfs_dir) {
		cmdqv_debugfs_dir =
@@ -844,19 +871,11 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,
	}
#endif

	new_smmu->impl_ops = &tegra241_cmdqv_impl_ops;
	/* Provide init-level ops only, until tegra241_cmdqv_init_structures */
	new_smmu->impl_ops = &init_ops;

	return new_smmu;

free_lvcmdq:
	for (lidx--; lidx >= 0; lidx--)
		tegra241_vintf_free_lvcmdq(vintf, lidx);
	tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
free_vintf:
	kfree(vintf);
destroy_ids:
	ida_destroy(&cmdqv->vintf_ids);
	kfree(cmdqv->vintfs);
free_irq:
	if (cmdqv->irq > 0)
		free_irq(cmdqv->irq, cmdqv);