Commit e93ec872 authored by Juergen Gross's avatar Juergen Gross
Browse files

x86/xen: allow larger contiguous memory regions in PV guests



Today a PV guest (including dom0) can create 2MB contiguous memory
regions for DMA buffers at max. This has led to problems at least
with the megaraid_sas driver, which wants to allocate a 2.3MB DMA
buffer.

The limiting factor is the frame array used to do the hypercall for
making the memory contiguous, which has 512 entries and is just a
static array in mmu_pv.c.

In order to not waste memory for non-PV guests, put the initial
frame array into .init.data section and dynamically allocate an array
from the .init_after_bootmem hook of PV guests.

In case a contiguous memory area larger than the initially supported
2MB is requested, allocate a larger buffer for the frame list. Note
that such an allocation is tried only after memory management has been
initialized properly, which is tested via a flag being set in the
.init_after_bootmem hook.

Fixes: 9f40ec84 ("xen/swiotlb: add alignment check for dma buffers")
Signed-off-by: default avatarJuergen Gross <jgross@suse.com>
Tested-by: default avatarAlan Robinson <Alan.Robinson@fujitsu.com>
Reviewed-by: default avatarJan Beulich <jbeulich@suse.com>
Signed-off-by: default avatarJuergen Gross <jgross@suse.com>
parent 85fcb57c
Loading
Loading
Loading
Loading
+62 −9
Original line number Diff line number Diff line
@@ -111,6 +111,51 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
 */
static DEFINE_SPINLOCK(xen_reservation_lock);

/* Protected by xen_reservation_lock. */
#define MIN_CONTIG_ORDER 9 /* 2MB */
static unsigned int discontig_frames_order = MIN_CONTIG_ORDER;
static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata;
static unsigned long *discontig_frames __refdata = discontig_frames_early;
static bool discontig_frames_dyn;

static int alloc_discontig_frames(unsigned int order)
{
	unsigned long *new_array, *old_array;
	unsigned int old_order;
	unsigned long flags;

	BUG_ON(order < MIN_CONTIG_ORDER);
	BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE);

	new_array = (unsigned long *)__get_free_pages(GFP_KERNEL,
						      order - MIN_CONTIG_ORDER);
	if (!new_array)
		return -ENOMEM;

	spin_lock_irqsave(&xen_reservation_lock, flags);

	old_order = discontig_frames_order;

	if (order > discontig_frames_order || !discontig_frames_dyn) {
		if (!discontig_frames_dyn)
			old_array = NULL;
		else
			old_array = discontig_frames;

		discontig_frames = new_array;
		discontig_frames_order = order;
		discontig_frames_dyn = true;
	} else {
		old_array = new_array;
	}

	spin_unlock_irqrestore(&xen_reservation_lock, flags);

	free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER);

	return 0;
}

/*
 * Note about cr3 (pagetable base) values:
 *
@@ -814,6 +859,9 @@ static void __init xen_after_bootmem(void)
	SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);

	if (alloc_discontig_frames(MIN_CONTIG_ORDER))
		BUG();
}

static void xen_unpin_page(struct mm_struct *mm, struct page *page,
@@ -2203,10 +2251,6 @@ void __init xen_init_mmu_ops(void)
	memset(dummy_mapping, 0xff, PAGE_SIZE);
}

/* Protected by xen_reservation_lock. */
#define MAX_CONTIG_ORDER 9 /* 2MB */
static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];

#define VOID_PTE (mfn_pte(0, __pgprot(0)))
static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
				unsigned long *in_frames,
@@ -2323,18 +2367,25 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
				 unsigned int address_bits,
				 dma_addr_t *dma_handle)
{
	unsigned long *in_frames = discontig_frames, out_frame;
	unsigned long *in_frames, out_frame;
	unsigned long  flags;
	int            success;
	unsigned long vstart = (unsigned long)phys_to_virt(pstart);

	if (unlikely(order > MAX_CONTIG_ORDER))
	if (unlikely(order > discontig_frames_order)) {
		if (!discontig_frames_dyn)
			return -ENOMEM;

		if (alloc_discontig_frames(order))
			return -ENOMEM;
	}

	memset((void *) vstart, 0, PAGE_SIZE << order);

	spin_lock_irqsave(&xen_reservation_lock, flags);

	in_frames = discontig_frames;

	/* 1. Zap current PTEs, remembering MFNs. */
	xen_zap_pfn_range(vstart, order, in_frames, NULL);

@@ -2358,12 +2409,12 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,

void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
	unsigned long *out_frames = discontig_frames, in_frame;
	unsigned long *out_frames, in_frame;
	unsigned long  flags;
	int success;
	unsigned long vstart;

	if (unlikely(order > MAX_CONTIG_ORDER))
	if (unlikely(order > discontig_frames_order))
		return;

	vstart = (unsigned long)phys_to_virt(pstart);
@@ -2371,6 +2422,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)

	spin_lock_irqsave(&xen_reservation_lock, flags);

	out_frames = discontig_frames;

	/* 1. Find start MFN of contiguous extent. */
	in_frame = virt_to_mfn((void *)vstart);