Commit c7940c8b authored by Tim Kovalenko's avatar Tim Kovalenko Committed by Danilo Krummrich
Browse files

gpu: nova-core: fix stack overflow in GSP memory allocation



The `Cmdq::new` function was allocating a `PteArray` struct on the stack
and was causing a stack overflow with 8216 bytes.

Modify the `PteArray` to calculate and write the Page Table Entries
directly into the coherent DMA buffer one-by-one. This reduces the stack
usage quite a lot.

Reported-by: default avatarGary Guo <gary@garyguo.net>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549
Link: https://lore.kernel.org/rust-for-linux/CANiq72mAQxbRJZDnik3Qmd4phvFwPA01O2jwaaXRh_T+2=L-qA@mail.gmail.com/


Fixes: f38b4f10 ("gpu: nova-core: Create initial Gsp")
Acked-by: default avatarAlexandre Courbot <acourbot@nvidia.com>
Signed-off-by: default avatarTim Kovalenko <tim.kovalenko@proton.me>
Link: https://patch.msgid.link/20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me


[ * Use PteArray::entry() in LogBuffer::new(),
  * Add TODO comment to use IoView projections once available,
  * Add PTE_ARRAY_SIZE constant to avoid duplication.

    - Danilo ]
Signed-off-by: default avatarDanilo Krummrich <dakr@kernel.org>
parent 4da879a0
Loading
Loading
Loading
Loading
+17 −15
Original line number Diff line number Diff line
@@ -47,16 +47,12 @@
unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}

impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
    /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`.
    fn new(start: DmaAddress) -> Result<Self> {
        let mut ptes = [0u64; NUM_PAGES];
        for (i, pte) in ptes.iter_mut().enumerate() {
            *pte = start
                .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
                .ok_or(EOVERFLOW)?;
        }

        Ok(Self(ptes))
    /// Returns the page table entry for `index`, for a mapping starting at `start`.
    // TODO: Replace with `IoView` projection once available.
    fn entry(start: DmaAddress, index: usize) -> Result<u64> {
        start
            .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT)
            .ok_or(EOVERFLOW)
    }
}

@@ -86,16 +82,22 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
            NUM_PAGES * GSP_PAGE_SIZE,
            GFP_KERNEL | __GFP_ZERO,
        )?);
        let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?;

        let start_addr = obj.0.dma_handle();

        // SAFETY: `obj` has just been created and we are its sole user.
        unsafe {
            // Copy the self-mapping PTE at the expected location.
        let pte_region = unsafe {
            obj.0
                .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))?
                .copy_from_slice(ptes.as_bytes())
                .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())?
        };

        // Write values one by one to avoid an on-stack instance of `PteArray`.
        for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
            let pte_value = PteArray::<0>::entry(start_addr, i)?;

            chunk.copy_from_slice(&pte_value.to_ne_bytes());
        }

        Ok(obj)
    }
}
+12 −2
Original line number Diff line number Diff line
@@ -159,7 +159,7 @@ struct Msgq {
#[repr(C)]
struct GspMem {
    /// Self-mapping page table entries.
    ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>,
    ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>,
    /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the
    /// write and read pointers that the CPU updates.
    ///
@@ -172,6 +172,10 @@ struct GspMem {
    gspq: Msgq,
}

impl GspMem {
    const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>();
}

// SAFETY: These structs don't meet the no-padding requirements of AsBytes but
// that is not a problem because they are not used outside the kernel.
unsafe impl AsBytes for GspMem {}
@@ -201,7 +205,13 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {

        let gsp_mem =
            CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
        dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?);

        let start = gsp_mem.dma_handle();
        // Write values one by one to avoid an on-stack instance of `PteArray`.
        for i in 0..GspMem::PTE_ARRAY_SIZE {
            dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?);
        }

        dma_write!(
            gsp_mem,
            [0]?.cpuq.tx,