Merge branch 'drm-intel-next' of git://people.freedesktop.org/~danvet/drm-intel into drm-core-next

Daniel Vetter wrote First pull request for 3.5-next, slightly large than usual because new things kept coming in since the last pull for 3.4. Highlights: - first batch of hw enablement for vlv (Jesse et al) and hsw (Eugeni). pci ids are not yet added, and there's still quite a few patches to merge (mostly modesetting). To make QA easier I've decided to merge this stuff in pieces. - loads of cleanups and prep patches spurred by the above. Especially vlv is a real frankenstein chip, but also hsw is stretching our driver's code design. Expect more to come in this area for 3.5. - more gmbus fixes, cleanups and improvements by Daniel Kurtz. Again, there are more patches needed (and some already queued up), but I wanted to split this a bit for better testing. - pwrite/pread rework and retuning. This series has been in the works for a few months already and a lot of i-g-t tests have been created for it. Now it's finally ready to be merged. Note that one patch in this series touches include/pagemap.h, that patch is acked-by akpm. - reduce mappable pressure and relocation throughput improvements from Chris. - mmap offset exhaustion mitigation by Chris Wilson. - a start at figuring out which codepaths in our messy dri1/ums+gem/kms driver we actually need to support by bailing out of unsupported case. The driver now refuses to load without kms on gen6+ and disallows a few ioctls that userspace never used in certain cases. More of this will definitely come. - More decoupling of global gtt and ppgtt. - Improved dual-link lvds detection by Takashi Iwai. - Shut up the compiler + plus fix the fallout (Ben) - Inverted panel brightness handling (mostly Acer manages to break things in this way). - Small fixlets and adjustements and some minor things to help debugging. Regression-wise QA reported quite a few issues on ivb, but all of them turned out to be hw stability issues which are already fixed in drm-intel-fixes (QA runs the nightly regression tests on -next alone, without -fixes automatically merged in). There's still one issue open on snb, it looks like occlusion query writes are not quite as cache coherent as we've expected. With some of the pwrite adjustements we can now reliably hit this. Kernel workaround for it is in the works." * 'drm-intel-next' of git://people.freedesktop.org/~danvet/drm-intel: (101 commits) drm/i915: VCS is not the last ring drm/i915: Add a dual link lvds quirk for MacBook Pro 8,2 drm/i915: make quirks more verbose drm/i915: dump the DMA fetch addr register on pre-gen6 drm/i915/sdvo: Include YRPB as an additional TV output type drm/i915: disallow gem init ioctl on ilk drm/i915: refuse to load on gen6+ without kms drm/i915: extract gt interrupt handler drm/i915: use render gen to switch ring irq functions drm/i915: rip out old HWSTAM missed irq WA for vlv drm/i915: open code gen6+ ring irqs drm/i915: ring irq cleanups drm/i915: add SFUSE_STRAP registers for digital port detection drm/i915: add WM_LINETIME registers drm/i915: add WRPLL clocks drm/i915: add LCPLL control registers drm/i915: add SSC offsets for SBI access drm/i915: add port clock selection support for HSW drm/i915: add S PLL control drm/i915: add PIXCLK_GATE register ... Conflicts: drivers/char/agp/intel-agp.h drivers/char/agp/intel-gtt.c drivers/gpu/drm/i915/i915_debugfs.c
2026-04-23 05:56:14 -04:00 · 2012-04-12 10:27:01 +01:00
parent 6a7068b4ef ec34a01de3
commit effbc4fd8e
33 changed files with 2584 additions and 1134 deletions
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -266,6 +266,12 @@ eb_destroy(struct eb_objects *eb)
 	kfree(eb);
 }

+static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
+{
+	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+		obj->cache_level != I915_CACHE_NONE);
+}
+
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_objects *eb,
@@ -273,6 +279,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_gem_object *target_obj;
+	struct drm_i915_gem_object *target_i915_obj;
 	uint32_t target_offset;
 	int ret = -EINVAL;

@@ -281,7 +288,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	if (unlikely(target_obj == NULL))
 		return -ENOENT;

-	target_offset = to_intel_bo(target_obj)->gtt_offset;
+	target_i915_obj = to_intel_bo(target_obj);
+	target_offset = target_i915_obj->gtt_offset;

 	/* The target buffer should have appeared before us in the
 	 * exec_object list, so it should have a GTT space bound by now.
@@ -352,11 +360,19 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return ret;
 	}

+	/* We can't wait for rendering with pagefaults disabled */
+	if (obj->active && in_atomic())
+		return -EFAULT;
+
 	reloc->delta += target_offset;
-	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
+	if (use_cpu_reloc(obj)) {
 		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
 		char *vaddr;

+		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+		if (ret)
+			return ret;
+
 		vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
 		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
 		kunmap_atomic(vaddr);
@@ -365,10 +381,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		uint32_t __iomem *reloc_entry;
 		void __iomem *reloc_page;

-		/* We can't wait for rendering with pagefaults disabled */
-		if (obj->active && in_atomic())
-			return -EFAULT;
-
 		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 		if (ret)
 			return ret;
@@ -383,6 +395,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		io_mapping_unmap_atomic(reloc_page);
 	}

+	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
+	 * pipe_control writes because the gpu doesn't properly redirect them
+	 * through the ppgtt for non_secure batchbuffers. */
+	if (unlikely(IS_GEN6(dev) &&
+	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
+	    !target_i915_obj->has_global_gtt_mapping)) {
+		i915_gem_gtt_bind_object(target_i915_obj,
+					 target_i915_obj->cache_level);
+	}
+
 	/* and update the user's relocation entry */
 	reloc->presumed_offset = target_offset;

@@ -393,30 +415,46 @@ static int
 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
 				    struct eb_objects *eb)
 {
+#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
+	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
 	struct drm_i915_gem_relocation_entry __user *user_relocs;
 	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-	int i, ret;
+	int remain, ret;

 	user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
-	for (i = 0; i < entry->relocation_count; i++) {
-		struct drm_i915_gem_relocation_entry reloc;

-		if (__copy_from_user_inatomic(&reloc,
-					      user_relocs+i,
-					      sizeof(reloc)))
+	remain = entry->relocation_count;
+	while (remain) {
+		struct drm_i915_gem_relocation_entry *r = stack_reloc;
+		int count = remain;
+		if (count > ARRAY_SIZE(stack_reloc))
+			count = ARRAY_SIZE(stack_reloc);
+		remain -= count;
+
+		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
 			return -EFAULT;

-		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
-		if (ret)
-			return ret;
+		do {
+			u64 offset = r->presumed_offset;

-		if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
-					    &reloc.presumed_offset,
-					    sizeof(reloc.presumed_offset)))
-			return -EFAULT;
+			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
+			if (ret)
+				return ret;
+
+			if (r->presumed_offset != offset &&
+			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
+						    &r->presumed_offset,
+						    sizeof(r->presumed_offset))) {
+				return -EFAULT;
+			}
+
+			user_relocs++;
+			r++;
+		} while (--count);
 	}

 	return 0;
+#undef N_RELOC
 }

 static int
@@ -464,6 +502,13 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,

 #define  __EXEC_OBJECT_HAS_FENCE (1<<31)

+static int
+need_reloc_mappable(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+	return entry->relocation_count && !use_cpu_reloc(obj);
+}
+
 static int
 pin_and_fence_object(struct drm_i915_gem_object *obj,
 		     struct intel_ring_buffer *ring)
@@ -477,8 +522,7 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
 		has_fenced_gpu_access &&
 		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 		obj->tiling_mode != I915_TILING_NONE;
-	need_mappable =
-		entry->relocation_count ? true : need_fence;
+	need_mappable = need_fence || need_reloc_mappable(obj);

 	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
 	if (ret)
@@ -535,8 +579,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			has_fenced_gpu_access &&
 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 			obj->tiling_mode != I915_TILING_NONE;
-		need_mappable =
-			entry->relocation_count ? true : need_fence;
+		need_mappable = need_fence || need_reloc_mappable(obj);

 		if (need_mappable)
 			list_move(&obj->exec_list, &ordered_objects);
@@ -576,8 +619,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 				has_fenced_gpu_access &&
 				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 				obj->tiling_mode != I915_TILING_NONE;
-			need_mappable =
-				entry->relocation_count ? true : need_fence;
+			need_mappable = need_fence || need_reloc_mappable(obj);

 			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
 			    (need_mappable && !obj->map_and_fenceable))
@@ -955,7 +997,7 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 		if (!access_ok(VERIFY_WRITE, ptr, length))
 			return -EFAULT;

-		if (fault_in_pages_readable(ptr, length))
+		if (fault_in_multipages_readable(ptr, length))
 			return -EFAULT;
 	}