Commit 2e161017 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-2017-03-06' of git://anongit.freedesktop.org/git/drm-intel into drm-next

4 weeks worth of stuff since I was traveling&lazy:

- lspcon improvements (Imre)
- proper atomic state for cdclk handling (Ville)
- gpu reset improvements (Chris)
- lots and lots of polish around fences, requests, waiting and
  everything related all over (both gem and modeset code), from Chris
- atomic by default on gen5+ minus byt/bsw (Maarten did the patch to
  flip the default, really this is a massive joint team effort)
- moar power domains, now 64bit (Ander)
- big pile of in-kernel unit tests for various gem subsystems (Chris),
  including simple mock objects for i915 device and and the ggtt
  manager.
- i915_gpu_info in debugfs, for taking a snapshot of the current gpu
  state. Same thing as i915_error_state, but useful if the kernel didn't
  notice something is stick. From Chris.
- bxt dsi fixes (Umar Shankar)
- bxt w/a updates (Jani)
- no more struct_mutex for gem object unreference (Chris)
- some execlist refactoring (Tvrtko)
- color manager support for glk (Ander)
- improve the power-well sync code to better take over from the
  firmware (Imre)
- gem tracepoint polish (Tvrtko)
- lots of glk fixes all around (Ander)
- ctx switch improvements (Chris)
- glk dsi support&fixes (Deepak M)
- dsi fixes for vlv and clanups, lots of them (Hans de Goede)
- switch to i915.ko types in lots of our internal modeset code (Ander)
- byt/bsw atomic wm update code, yay (Ville)

* tag 'drm-intel-next-2017-03-06' of git://anongit.freedesktop.org/git/drm-intel: (432 commits)
  drm/i915: Update DRIVER_DATE to 20170306
  drm/i915: Don't use enums for hardware engine id
  drm/i915: Split breadcrumbs spinlock into two
  drm/i915: Refactor wakeup of the next breadcrumb waiter
  drm/i915: Take reference for signaling the request from hardirq
  drm/i915: Add FIFO underrun tracepoints
  drm/i915: Add cxsr toggle tracepoint
  drm/i915: Add VLV/CHV watermark/FIFO programming tracepoints
  drm/i915: Add plane update/disable tracepoints
  drm/i915: Kill level 0 wm hack for VLV/CHV
  drm/i915: Workaround VLV/CHV sprite1->sprite0 enable underrun
  drm/i915: Sanitize VLV/CHV watermarks properly
  drm/i915: Only use update_wm_{pre,post} for pre-ilk platforms
  drm/i915: Nuke crtc->wm.cxsr_allowed
  drm/i915: Compute proper intermediate wms for vlv/cvh
  drm/i915: Skip useless watermark/FIFO related work on VLV/CHV when not needed
  drm/i915: Compute vlv/chv wms the atomic way
  drm/i915: Compute VLV/CHV FIFO sizes based on the PM2 watermarks
  drm/i915: Plop vlv/chv fifo sizes into crtc state
  drm/i915: Plop vlv wm state into crtc_state
  ...
parents b558dfd5 505b6815
......@@ -222,6 +222,15 @@ Video BIOS Table (VBT)
.. kernel-doc:: drivers/gpu/drm/i915/intel_vbt_defs.h
:internal:
Display clocks
--------------
.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
:doc: CDCLK / RAWCLK
.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
:internal:
Display PLLs
------------
......
......@@ -526,6 +526,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_SKL_IDS(&gen9_early_ops),
INTEL_BXT_IDS(&gen9_early_ops),
INTEL_KBL_IDS(&gen9_early_ops),
INTEL_GLK_IDS(&gen9_early_ops),
};
static void __init
......
......@@ -332,14 +332,6 @@ static void i810_write_entry(dma_addr_t addr, unsigned int entry,
writel_relaxed(addr | pte_flags, intel_private.gtt + entry);
}
static const struct aper_size_info_fixed intel_fake_agp_sizes[] = {
{32, 8192, 3},
{64, 16384, 4},
{128, 32768, 5},
{256, 65536, 6},
{512, 131072, 7},
};
static unsigned int intel_gtt_stolen_size(void)
{
u16 gmch_ctrl;
......@@ -670,6 +662,14 @@ static int intel_gtt_init(void)
}
#if IS_ENABLED(CONFIG_AGP_INTEL)
static const struct aper_size_info_fixed intel_fake_agp_sizes[] = {
{32, 8192, 3},
{64, 16384, 4},
{128, 32768, 5},
{256, 65536, 6},
{512, 131072, 7},
};
static int intel_fake_agp_fetch_size(void)
{
int num_sizes = ARRAY_SIZE(intel_fake_agp_sizes);
......
......@@ -19,6 +19,7 @@ config DRM_I915
select INPUT if ACPI
select ACPI_VIDEO if ACPI
select ACPI_BUTTON if ACPI
select SYNC_FILE
help
Choose this option if you have a system that has "Intel Graphics
Media Accelerator" or "HD Graphics" integrated graphics,
......
......@@ -24,7 +24,9 @@ config DRM_I915_DEBUG
select X86_MSR # used by igt/pm_rpm
select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
select DRM_DEBUG_MM if DRM=y
select DRM_DEBUG_MM_SELFTEST
select DRM_I915_SW_FENCE_DEBUG_OBJECTS
select DRM_I915_SELFTEST
default n
help
Choose this option to turn on extra driver debugging that may affect
......@@ -58,3 +60,30 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
Recommended for driver developers only.
If in doubt, say "N".
config DRM_I915_SELFTEST
bool "Enable selftests upon driver load"
depends on DRM_I915
default n
select FAULT_INJECTION
select PRIME_NUMBERS
help
Choose this option to allow the driver to perform selftests upon
loading; also requires the i915.selftest=1 module parameter. To
exit the module after running the selftests (i.e. to prevent normal
module initialisation afterwards) use i915.selftest=-1.
Recommended for driver developers only.
If in doubt, say "N".
config DRM_I915_LOW_LEVEL_TRACEPOINTS
bool "Enable low level request tracing events"
depends on DRM_I915
default n
help
Choose this option to turn on low level request tracing events.
This provides the ability to precisely monitor engine utilisation
and also analyze the request dependency resolving timeline.
If in doubt, say "N".
......@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
# GEM code
i915-y += i915_cmd_parser.o \
i915_gem_batch_pool.o \
i915_gem_clflush.o \
i915_gem_context.o \
i915_gem_dmabuf.o \
i915_gem_evict.o \
......@@ -72,6 +73,7 @@ i915-y += intel_audio.o \
intel_atomic.o \
intel_atomic_plane.o \
intel_bios.o \
intel_cdclk.o \
intel_color.o \
intel_display.o \
intel_dpio_phy.o \
......@@ -116,6 +118,9 @@ i915-y += dvo_ch7017.o \
# Post-mortem debug and GPU hang state capture
i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
i915-$(CONFIG_DRM_I915_SELFTEST) += \
selftests/i915_random.o \
selftests/i915_selftest.o
# virtual gpu code
i915-y += i915_vgpu.o
......
......@@ -1530,7 +1530,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
len += copy_len;
gma += copy_len;
}
return 0;
return len;
}
......@@ -1644,7 +1644,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
gma, gma + bb_size,
dst);
if (ret) {
if (ret < 0) {
gvt_err("fail to copy guest ring buffer\n");
goto unmap_src;
}
......@@ -2608,11 +2608,8 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
int ring_id = workload->ring_id;
struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx;
struct intel_ring *ring = shadow_ctx->engine[ring_id].ring;
unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
unsigned int copy_len = 0;
u32 *cs;
int ret;
guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
......@@ -2626,36 +2623,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
gma_top = workload->rb_start + guest_rb_size;
/* allocate shadow ring buffer */
ret = intel_ring_begin(workload->req, workload->rb_len / 4);
if (ret)
return ret;
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs))
return PTR_ERR(cs);
/* get shadow ring buffer va */
workload->shadow_ring_buffer_va = ring->vaddr + ring->tail;
workload->shadow_ring_buffer_va = cs;
/* head > tail --> copy head <-> top */
if (gma_head > gma_tail) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
gma_head, gma_top,
workload->shadow_ring_buffer_va);
if (ret) {
gma_head, gma_top, cs);
if (ret < 0) {
gvt_err("fail to copy guest ring buffer\n");
return ret;
}
copy_len = gma_top - gma_head;
cs += ret / sizeof(u32);
gma_head = workload->rb_start;
}
/* copy head or start <-> tail */
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
gma_head, gma_tail,
workload->shadow_ring_buffer_va + copy_len);
if (ret) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
if (ret < 0) {
gvt_err("fail to copy guest ring buffer\n");
return ret;
}
ring->tail += workload->rb_len;
intel_ring_advance(ring);
cs += ret / sizeof(u32);
intel_ring_advance(workload->req, cs);
return 0;
}
......@@ -2709,7 +2703,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
wa_ctx->workload->vgpu->gtt.ggtt_mm,
guest_gma, guest_gma + ctx_size,
map);
if (ret) {
if (ret < 0) {
gvt_err("fail to copy guest indirect ctx\n");
goto unmap_src;
}
......
This diff is collapsed.
......@@ -43,6 +43,7 @@
#include <drm/drmP.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_atomic_helper.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
......@@ -248,6 +249,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_IRQ_ACTIVE:
case I915_PARAM_ALLOW_BATCHBUFFER:
case I915_PARAM_LAST_DISPATCH:
case I915_PARAM_HAS_EXEC_CONSTANTS:
/* Reject all old ums/dri params. */
return -ENODEV;
case I915_PARAM_CHIPSET_ID:
......@@ -274,9 +276,6 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_BSD2:
value = !!dev_priv->engine[VCS2];
break;
case I915_PARAM_HAS_EXEC_CONSTANTS:
value = INTEL_GEN(dev_priv) >= 4;
break;
case I915_PARAM_HAS_LLC:
value = HAS_LLC(dev_priv);
break;
......@@ -318,10 +317,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool;
break;
case I915_PARAM_HUC_STATUS:
/* The register is already force-woken. We dont need
* any rpm here
*/
intel_runtime_pm_get(dev_priv);
value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
intel_runtime_pm_put(dev_priv);
break;
case I915_PARAM_MMAP_GTT_VERSION:
/* Though we've started our numbering from 1, and so class all
......@@ -350,6 +348,8 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
case I915_PARAM_HAS_COHERENT_PHYS_GTT:
case I915_PARAM_HAS_EXEC_SOFTPIN:
case I915_PARAM_HAS_EXEC_ASYNC:
case I915_PARAM_HAS_EXEC_FENCE:
/* For the time being all of these are always true;
* if some supported hardware does not have one of these
* features this value needs to be provided from
......@@ -756,6 +756,15 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
return -ENOMEM;
}
static void i915_engines_cleanup(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, i915, id)
kfree(engine);
}
static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
{
destroy_workqueue(dev_priv->hotplug.dp_wq);
......@@ -769,10 +778,17 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
*/
static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv)
{
if (IS_HSW_EARLY_SDV(dev_priv) ||
IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0))
bool pre = false;
pre |= IS_HSW_EARLY_SDV(dev_priv);
pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0);
pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST);
if (pre) {
DRM_ERROR("This is a pre-production stepping. "
"It may not be fully functional.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
}
}
/**
......@@ -808,6 +824,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
spin_lock_init(&dev_priv->gpu_error.lock);
mutex_init(&dev_priv->backlight_lock);
spin_lock_init(&dev_priv->uncore.lock);
spin_lock_init(&dev_priv->mm.object_stat_lock);
spin_lock_init(&dev_priv->mmio_flip_lock);
spin_lock_init(&dev_priv->wm.dsparb_lock);
......@@ -818,12 +835,15 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
mutex_init(&dev_priv->pps_mutex);
intel_uc_init_early(dev_priv);
i915_memcpy_init_early(dev_priv);
ret = intel_engines_init_early(dev_priv);
if (ret)
return ret;
ret = i915_workqueues_init(dev_priv);
if (ret < 0)
return ret;
goto err_engines;
/* This must be called before any calls to HAS_PCH_* */
intel_detect_pch(dev_priv);
......@@ -852,6 +872,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
err_workqueues:
i915_workqueues_cleanup(dev_priv);
err_engines:
i915_engines_cleanup(dev_priv);
return ret;
}
......@@ -864,6 +886,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
i915_perf_fini(dev_priv);
i915_gem_load_cleanup(dev_priv);
i915_workqueues_cleanup(dev_priv);
i915_engines_cleanup(dev_priv);
}
static int i915_mmio_setup(struct drm_i915_private *dev_priv)
......@@ -930,6 +953,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
goto put_bridge;
intel_uncore_init(dev_priv);
i915_gem_init_mmio(dev_priv);
return 0;
......@@ -967,7 +991,7 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores);
DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores));
DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores));
}
/**
......@@ -1185,11 +1209,15 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
*/
int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
{
const struct intel_device_info *match_info =
(struct intel_device_info *)ent->driver_data;
struct drm_i915_private *dev_priv;
int ret;
if (i915.nuclear_pageflip)
driver.driver_features |= DRIVER_ATOMIC;
/* Enable nuclear pageflip on ILK+, except vlv/chv */
if (!i915.nuclear_pageflip &&
(match_info->gen < 5 || match_info->has_gmch_display))
driver.driver_features &= ~DRIVER_ATOMIC;
ret = -ENOMEM;
dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
......@@ -1197,8 +1225,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = drm_dev_init(&dev_priv->drm, &driver, &pdev->dev);
if (ret) {
DRM_DEV_ERROR(&pdev->dev, "allocation failed\n");
kfree(dev_priv);
return ret;
goto out_free;
}
dev_priv->drm.pdev = pdev;
......@@ -1206,7 +1233,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = pci_enable_device(pdev);
if (ret)
goto out_free_priv;
goto out_fini;
pci_set_drvdata(pdev, &dev_priv->drm);
......@@ -1270,9 +1297,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
i915_driver_cleanup_early(dev_priv);
out_pci_disable:
pci_disable_device(pdev);
out_free_priv:
out_fini:
i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret);
drm_dev_unref(&dev_priv->drm);
drm_dev_fini(&dev_priv->drm);
out_free:
kfree(dev_priv);
return ret;
}
......@@ -1280,6 +1309,8 @@ void i915_driver_unload(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct pci_dev *pdev = dev_priv->drm.pdev;
struct drm_modeset_acquire_ctx ctx;
int ret;
intel_fbdev_fini(dev);
......@@ -1288,6 +1319,24 @@ void i915_driver_unload(struct drm_device *dev)
intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
drm_modeset_acquire_init(&ctx, 0);
while (1) {
ret = drm_modeset_lock_all_ctx(dev, &ctx);
if (!ret)
ret = drm_atomic_helper_disable_all(dev, &ctx);
if (ret != -EDEADLK)
break;
drm_modeset_backoff(&ctx);
}
if (ret)
DRM_ERROR("Disabling all crtc's during unload failed with %i\n", ret);
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
intel_gvt_cleanup(dev_priv);
i915_driver_unregister(dev_priv);
......@@ -1317,7 +1366,7 @@ void i915_driver_unload(struct drm_device *dev)
/* Free error state after interrupts are fully disabled. */
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
i915_destroy_error_state(dev_priv);
i915_reset_error_state(dev_priv);
/* Flush any outstanding unpin_work. */
drain_workqueue(dev_priv->wq);
......@@ -1333,8 +1382,16 @@ void i915_driver_unload(struct drm_device *dev)
i915_driver_cleanup_mmio(dev_priv);
intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
}
static void i915_driver_release(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
i915_driver_cleanup_early(dev_priv);
drm_dev_fini(&dev_priv->drm);
kfree(dev_priv);
}
static int i915_driver_open(struct drm_device *dev, struct drm_file *file)
......@@ -1716,6 +1773,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
!(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload))
intel_power_domains_init_hw(dev_priv, true);
i915_gem_sanitize(dev_priv);
enable_rpm_wakeref_asserts(dev_priv);
out:
......@@ -1787,7 +1846,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
goto error;
}
i915_gem_reset_finish(dev_priv);
i915_gem_reset(dev_priv);
intel_overlay_reset(dev_priv);
/* Ok, now get things going again... */
......@@ -1813,6 +1872,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
i915_queue_hangcheck(dev_priv);
wakeup:
i915_gem_reset_finish(dev_priv);
enable_irq(dev_priv->drm.irq);
wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
return;
......@@ -2532,7 +2592,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
......@@ -2574,7 +2634,8 @@ static struct drm_driver driver = {
*/
.driver_features =
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME |
DRIVER_RENDER | DRIVER_MODESET,
DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC,
.release = i915_driver_release,
.open = i915_driver_open,
.lastclose = i915_driver_lastclose,
.preclose = i915_driver_preclose,
......@@ -2603,3 +2664,7 @@ static struct drm_driver driver = {
.minor = DRIVER_MINOR,
.patchlevel = DRIVER_PATCHLEVEL,
};
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_drm.c"
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -28,9 +28,18 @@
#ifdef CONFIG_DRM_I915_DEBUG_GEM
#define GEM_BUG_ON(expr) BUG_ON(expr)
#define GEM_WARN_ON(expr) WARN_ON(expr)
#define GEM_DEBUG_DECL(var) var
#define GEM_DEBUG_EXEC(expr) expr
#define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr)
#else
#define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
#define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0)
#define GEM_DEBUG_DECL(var)
#define GEM_DEBUG_EXEC(expr) do { } while (0)
#define GEM_DEBUG_BUG_ON(expr)
#endif
#define I915_NUM_ENGINES 5
......
......@@ -122,9 +122,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
if (tmp->base.size >= size) {
/* Clear the set of shared fences early */
ww_mutex_lock(&tmp->resv->lock, NULL);
reservation_object_lock(tmp->resv, NULL);
reservation_object_add_excl_fence(tmp->resv, NULL);
ww_mutex_unlock(&tmp->resv->lock);
reservation_object_unlock(tmp->resv);
obj = tmp;
break;
......
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "i915_drv.h"
#include "intel_frontbuffer.h"
#include "i915_gem_clflush.h"
static DEFINE_SPINLOCK(clflush_lock);
static u64 clflush_context;
struct clflush {
struct dma_fence dma; /* Must be first for dma_fence_free() */
struct i915_sw_fence wait;
struct work_struct work;
struct drm_i915_gem_object *obj;
};
static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
{
return DRIVER_NAME;
}
static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
{
return "clflush";
}
static bool i915_clflush_enable_signaling(struct dma_fence *fence)
{
return true;
}
static void i915_clflush_release(struct dma_fence *fence)
{
struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
i915_sw_fence_fini(&clflush->wait);
BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
dma_fence_free(&clflush->dma);
}
static const struct dma_fence_ops i915_clflush_ops = {
.get_driver_name = i915_clflush_get_driver_name,
.get_timeline_name = i915_clflush_get_timeline_name,
.enable_signaling = i915_clflush_enable_signaling,
.wait = dma_fence_default_wait,
.release = i915_clflush_release,
};
static void __i915_do_clflush(struct drm_i915_gem_object *obj)
{
drm_clflush_sg(obj->mm.pages);
obj->cache_dirty = false;
intel_fb_obj_flush(obj, ORIGIN_CPU);
}
static void i915_clflush_work(struct work_struct *work)
{
struct clflush *clflush = container_of(work, typeof(*clflush), work);
struct drm_i915_gem_object *obj = clflush->obj;
if (!obj->cache_dirty)
goto out;
if (i915_gem_object_pin_pages(obj)) {
DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
goto out;
}
__i915_do_clflush(obj);
i915_gem_object_unpin_pages(obj);
out:
i915_gem_object_put(obj);
dma_fence_signal(&clflush->dma);
dma_fence_put(&clflush->dma);
}
static int __i915_sw_fence_call
i915_clflush_notify(struct i915_sw_fence *fence,
enum i915_sw_fence_notify state)
{
struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
switch (state) {
case FENCE_COMPLETE:
schedule_work(&clflush->work);
break;
case FENCE_FREE:
dma_fence_put(&clflush->dma);
break;
}
return NOTIFY_DONE;
}
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
unsigned int flags)
{
struct clflush *clflush;
/*
* Stolen memory is always coherent with the GPU as it is explicitly
* marked as wc by the system, or the system is cache-coherent.
* Similarly, we only access struct pages through the CPU cache, so
* anything not backed by physical memory we consider to be always
* coherent and not need clflushing.
*/
if (!i915_gem_object_has_struct_page(obj))
return;
obj->cache_dirty = true;
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
* the caches are only snooped when the render cache is
* flushed/invalidated. As we always have to emit invalidations
* and flushes when moving into and out of the RENDER domain, correct
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
return;
trace_i915_gem_object_clflush(obj);
clflush = NULL;
if (!(flags & I915_CLFLUSH_SYNC))
clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
if (clflush) {
dma_fence_init(&clflush->dma,
&i915_clflush_ops,
&clflush_lock,
clflush_context,
0);
i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
clflush->obj = i915_gem_object_get(obj);
INIT_WORK(&clflush->work, i915_clflush_work);
dma_fence_get(&clflush->dma);
i915_sw_fence_await_reservation(&clflush->wait,
obj->resv, NULL,
false, I915_FENCE_TIMEOUT,
GFP_KERNEL);
reservation_object_lock(obj->resv, NULL);
reservation_object_add_excl_fence(obj->resv, &clflush->dma);
reservation_object_unlock(obj->resv);
i915_sw_fence_commit(&clflush->wait);
} else if (obj->mm.pages) {
__i915_do_clflush(obj);
} else {
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
}
}
void i915_gem_clflush_init(struct drm_i915_private *i915)
{
clflush_context = dma_fence_context_alloc(1);
}
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_GEM_CLFLUSH_H__
#define __I915_GEM_CLFLUSH_H__
struct drm_i915_private;
struct drm_i915_gem_object;
void i915_gem_clflush_init(struct drm_i915_private *i915);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
unsigned int flags);
#define I915_CLFLUSH_FORCE BIT(0)
#define I915_CLFLUSH_SYNC BIT(1)
#endif /* __I915_GEM_CLFLUSH_H__ */
......@@ -92,21 +92,6 @@
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
/* This is a HW constraint. The value below is the largest known requirement
* I've seen in a spec to date, and that was a workaround for a non-shipping
* part. It should be safe to decrease this, but it's more future proof as is.
*/
#define GEN6_CONTEXT_ALIGN (64<<10)
#define GEN7_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT
static size_t get_context_alignment(struct drm_i915_private *dev_priv)
{
if (IS_GEN6(dev_priv))
return GEN6_CONTEXT_ALIGN;
return GEN7_CONTEXT_ALIGN;
}
static int get_context_size(struct drm_i915_private *dev_priv)
{
int ret;
......@@ -236,6 +221,30 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
return 0;
}
static u32 default_desc_template(const struct drm_i915_private *i915,
const struct i915_hw_ppgtt *ppgtt)
{
u32 address_mode;
u32 desc;
desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
address_mode = INTEL_LEGACY_32B_CONTEXT;
if (ppgtt && i915_vm_is_48bit(&ppgtt->base))
address_mode = INTEL_LEGACY_64B_CONTEXT;
desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
if (IS_GEN8(i915))
desc |= GEN8_CTX_L3LLC_COHERENT;
/* TODO: WaDisableLiteRestore when we start using semaphore
* signalling between Command Streamers
* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
*/
return desc;
}
static struct i915_gem_context *
__create_hw_context(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv)
......@@ -257,8 +266,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
list_add_tail(&ctx->link, &dev_priv->context_list);
ctx->i915 = dev_priv;
ctx->ggtt_alignment = get_context_alignment(dev_priv);
if (dev_priv->hw_context_size) {
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
......@@ -309,8 +316,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
i915_gem_context_set_bannable(ctx);
ctx->ring_size = 4 * PAGE_SIZE;
ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
ctx->desc_template =
default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
/* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not
......@@ -332,6 +339,13 @@ __create_hw_context(struct drm_i915_private *dev_priv,
return ERR_PTR(ret);
}
static void __destroy_hw_context(struct i915_gem_context *ctx,
struct drm_i915_file_private *file_priv)
{
idr_remove(&file_priv->context_idr, ctx->user_handle);
context_close(ctx);
}
/**
* The default context needs to exist per ring that uses contexts. It stores the
* context state of the GPU for applications that don't utilize HW contexts, as
......@@ -356,12 +370,12 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
if (IS_ERR(ppgtt)) {
DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
idr_remove(&file_priv->context_idr, ctx->user_handle);
context_close(ctx);
__destroy_hw_context(ctx, file_priv);
return ERR_CAST(ppgtt);
}
ctx->ppgtt = ppgtt;
ctx->desc_template = default_desc_template(dev_priv, ppgtt);
}
trace_i915_context_create(ctx);
......@@ -400,6 +414,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
i915_gem_context_set_closed(ctx); /* not user accessible */
i915_gem_context_clear_bannable(ctx);
i915_gem_context_set_force_single_submission(ctx);
if (!i915.enable_guc_submission)
ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
......@@ -451,6 +466,11 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
return PTR_ERR(ctx);
}
/* For easy recognisablity, we want the kernel context to be 0 and then
* all user contexts will have non-zero hw_id.
*/
GEM_BUG_ON(ctx->hw_id);
i915_gem_context_clear_bannable(ctx);
ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */
dev_priv->kernel_context = ctx;
......@@ -560,27 +580,15 @@ static inline int
mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
{
struct drm_i915_private *dev_priv = req->i915;
struct intel_ring *ring = req->ring;
struct intel_engine_cs *engine = req->engine;
enum intel_engine_id id;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
i915.semaphores ?
INTEL_INFO(dev_priv)->num_rings - 1 :
0;
int len, ret;
/* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
* invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
* explicitly, so we rely on the value at ring init, stored in
* itlb_before_ctx_switch.
*/
if (IS_GEN6(dev_priv)) {
ret = engine->emit_flush(req, EMIT_INVALIDATE);
if (ret)
return ret;
}
int len;
/* These flags are for resource streamer on HSW+ */
if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
......@@ -593,99 +601,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
if (INTEL_GEN(dev_priv) >= 7)
len += 2 + (num_rings ? 4*num_rings + 6 : 0);
ret = intel_ring_begin(req, len);
if (ret)
return ret;
cs = intel_ring_begin(req, len);
if (IS_ERR(cs))
return PTR_ERR(cs);
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
if (INTEL_GEN(dev_priv) >= 7) {
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
if (num_rings) {
struct intel_engine_cs *signaller;
intel_ring_emit(ring,
MI_LOAD_REGISTER_IMM(num_rings));
*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
for_each_engine(signaller, dev_priv, id) {
if (signaller == engine)
continue;
intel_ring_emit_reg(ring,
*cs++ = i915_mmio_reg_offset(
RING_PSMI_CTL(signaller->mmio_base));
intel_ring_emit(ring,
_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
*cs++ = _MASKED_BIT_ENABLE(
GEN6_PSMI_SLEEP_MSG_DISABLE);
}
}
}
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_SET_CONTEXT);
intel_ring_emit(ring,
i915_ggtt_offset(req->ctx->engine[RCS].state) | flags);
*cs++ = MI_NOOP;
*cs++ = MI_SET_CONTEXT;
*cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags;
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
*/
intel_ring_emit(ring, MI_NOOP);
*cs++ = MI_NOOP;
if (INTEL_GEN(dev_priv) >= 7) {
if (num_rings) {
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
intel_ring_emit(ring,
MI_LOAD_REGISTER_IMM(num_rings));
*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
for_each_engine(signaller, dev_priv, id) {
if (signaller == engine)
continue;
last_reg = RING_PSMI_CTL(signaller->mmio_base);
intel_ring_emit_reg(ring, last_reg);
intel_ring_emit(ring,
_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
*cs++ = i915_mmio_reg_offset(last_reg);
*cs++ = _MASKED_BIT_DISABLE(
GEN6_PSMI_SLEEP_MSG_DISABLE);
}
/* Insert a delay before the next switch! */
intel_ring_emit(ring,
MI_STORE_REGISTER_MEM |
MI_SRM_LRM_GLOBAL_GTT);
intel_ring_emit_reg(ring, last_reg);
intel_ring_emit(ring,
i915_ggtt_offset(engine->scratch));
intel_ring_emit(ring, MI_NOOP);
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(last_reg);
*cs++ = i915_ggtt_offset(engine->scratch);
*cs++ = MI_NOOP;
}
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
}
intel_ring_advance(ring);
intel_ring_advance(req, cs);
return ret;
return 0;
}
static int remap_l3(struct drm_i915_gem_request *req, int slice)
{
u32 *remap_info = req->i915->l3_parity.remap_info[slice];
struct intel_ring *ring = req->ring;
int i, ret;
u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice];
int i;
if (!remap_info)
return 0;
ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
if (ret)
return ret;
cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
/*
* Note: We do not worry about the concurrent register cacheline hang
* here because no other code should access these registers other than
* at initialization time.
*/
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
intel_ring_emit(ring, remap_info[i]);
*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
*cs++ = MI_NOOP;
intel_ring_advance(req, cs);
return 0;
}
......@@ -1014,8 +1015,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
return PTR_ERR(ctx);
}
idr_remove(&file_priv->context_idr, ctx->user_handle);
context_close(ctx);
__destroy_hw_context(ctx, file_priv);
mutex_unlock(&dev->struct_mutex);
DRM_DEBUG("HW context %d destroyed\n", args->ctx_id);
......@@ -1164,3 +1164,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
return 0;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_context.c"
#include "selftests/i915_gem_context.c"
#endif
......@@ -140,8 +140,6 @@ struct i915_gem_context {
*/
int priority;
/** ggtt_alignment: alignment restriction for context objects */
u32 ggtt_alignment;
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;
......
......@@ -307,3 +307,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
return ERR_PTR(ret);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_dmabuf.c"
#include "selftests/i915_gem_dmabuf.c"
#endif
......@@ -258,6 +258,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
int ret = 0;
lockdep_assert_held(&vm->i915->drm.struct_mutex);
GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
trace_i915_gem_evict_node(vm, target, flags);
/* Retire before we search the active list. Although we have
......@@ -271,11 +274,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
check_color = vm->mm.color_adjust;
if (check_color) {
/* Expand search to cover neighbouring guard pages (or lack!) */
if (start > vm->start)
if (start)
start -= I915_GTT_PAGE_SIZE;
if (end < vm->start + vm->total)
/* Always look at the page afterwards to avoid the end-of-GTT */
end += I915_GTT_PAGE_SIZE;
}
GEM_BUG_ON(start >= end);
drm_mm_for_each_node_in_range(node, &vm->mm, start, end) {
/* If we find any non-objects (!vma), we cannot evict them */
......@@ -284,6 +289,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
break;
}
GEM_BUG_ON(!node->allocated);
vma = container_of(node, typeof(*vma), node);
/* If we are using coloring to insert guard pages between
......@@ -387,3 +393,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
return 0;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_gem_evict.c"
#endif
......@@ -28,12 +28,14 @@
#include <linux/dma_remapping.h>
#include <linux/reservation.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include "intel_frontbuffer.h"
......@@ -1110,13 +1112,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
continue;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
i915_gem_clflush_object(obj, 0);
obj->base.write_domain = 0;
}
ret = i915_gem_request_await_object
(req, obj, obj->base.pending_write_domain);
if (ret)
return ret;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
i915_gem_clflush_object(obj, false);
}
/* Unconditionally flush any chipset caches (for streaming writes). */
......@@ -1297,12 +1304,12 @@ static void eb_export_fence(struct drm_i915_gem_object *obj,
* handle an error right now. Worst case should be missed
* synchronisation leading to rendering corruption.
*/
ww_mutex_lock(&resv->lock, NULL);
reservation_object_lock(resv, NULL);
if (flags & EXEC_OBJECT_WRITE)
reservation_object_add_excl_fence(resv, &req->fence);
else if (reservation_object_reserve_shared(resv) == 0)
reservation_object_add_shared_fence(resv, &req->fence);
ww_mutex_unlock(&resv->lock);
reservation_object_unlock(resv);
}
static void
......@@ -1313,8 +1320,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
u32 old_read = obj->base.read_domains;
u32 old_write = obj->base.write_domain;
obj->base.write_domain = obj->base.pending_write_domain;
if (obj->base.write_domain)
......@@ -1325,32 +1330,31 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
eb_export_fence(obj, req, vma->exec_entry->flags);
trace_i915_gem_object_change_domain(obj, old_read, old_write);
}
}
static int
i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
{
struct intel_ring *ring = req->ring;
int ret, i;
u32 *cs;
int i;
if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
DRM_DEBUG("sol reset is gen7/rcs only\n");
return -EINVAL;
}
ret = intel_ring_begin(req, 4 * 3);
if (ret)
return ret;
cs = intel_ring_begin(req, 4 * 3);
if (IS_ERR(cs))
return PTR_ERR(cs);
for (i = 0; i < 4; i++) {
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
intel_ring_emit(ring, 0);
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
}
intel_ring_advance(ring);
intel_ring_advance(req, cs);
return 0;
}
......@@ -1403,15 +1407,20 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
return vma;
}
static void
add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file)
{
req->file_priv = file->driver_priv;
list_add_tail(&req->client_link, &req->file_priv->mm.request_list);
}
static int
execbuf_submit(struct i915_execbuffer_params *params,
struct drm_i915_gem_execbuffer2 *args,
struct list_head *vmas)
{
struct drm_i915_private *dev_priv = params->request->i915;
u64 exec_start, exec_len;
int instp_mode;
u32 instp_mask;
int ret;
ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
......@@ -1422,56 +1431,11 @@ execbuf_submit(struct i915_execbuffer_params *params,
if (ret)
return ret;
instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
instp_mask = I915_EXEC_CONSTANTS_MASK;
switch (instp_mode) {
case I915_EXEC_CONSTANTS_REL_GENERAL:
case I915_EXEC_CONSTANTS_ABSOLUTE:
case I915_EXEC_CONSTANTS_REL_SURFACE:
if (instp_mode != 0 && params->engine->id != RCS) {
DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
if (args->flags & I915_EXEC_CONSTANTS_MASK) {
DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n");
return -EINVAL;
}
if (instp_mode != dev_priv->relative_constants_mode) {
if (INTEL_INFO(dev_priv)->gen < 4) {
DRM_DEBUG("no rel constants on pre-gen4\n");
return -EINVAL;
}
if (INTEL_INFO(dev_priv)->gen > 5 &&
instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
return -EINVAL;
}
/* The HW changed the meaning on this bit on gen6 */
if (INTEL_INFO(dev_priv)->gen >= 6)
instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
}
break;
default:
DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
return -EINVAL;
}
if (params->engine->id == RCS &&
instp_mode != dev_priv->relative_constants_mode) {
struct intel_ring *ring = params->request->ring;
ret = intel_ring_begin(params->request, 4);
if (ret)
return ret;
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(ring, INSTPM);
intel_ring_emit(ring, instp_mask << 16 | instp_mode);
intel_ring_advance(ring);
dev_priv->relative_constants_mode = instp_mode;
}
if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
ret = i915_reset_gen7_sol_offsets(params->request);
if (ret)
......@@ -1491,8 +1455,6 @@ execbuf_submit(struct i915_execbuffer_params *params,
if (ret)
return ret;
trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
i915_gem_execbuffer_move_to_active(vmas, params->request);
return 0;
......@@ -1591,6 +1553,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct i915_execbuffer_params *params = &params_master;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 dispatch_flags;
struct dma_fence *in_fence = NULL;
struct sync_file *out_fence = NULL;
int out_fence_fd = -1;
int ret;
bool need_relocs;
......@@ -1634,6 +1599,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
dispatch_flags |= I915_DISPATCH_RS;
}
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
return -EINVAL;
}
if (args->flags & I915_EXEC_FENCE_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
ret = out_fence_fd;
goto err_in_fence;
}
}
/* Take a local wakeref for preparing to dispatch the execbuf as
* we expect to access the hardware fairly frequently in the
* process. Upon first dispatch, we acquire another prolonged
......@@ -1778,6 +1757,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err_batch_unpin;
}
if (in_fence) {
ret = i915_gem_request_await_dma_fence(params->request,
in_fence);
if (ret < 0)
goto err_request;
}
if (out_fence_fd != -1) {
out_fence = sync_file_create(&params->request->fence);
if (!out_fence) {
ret = -ENOMEM;
goto err_request;
}
}
/* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this
* request is retired will the the batch_obj be moved onto the
......@@ -1786,10 +1780,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
*/
params->request->batch = params->batch;
ret = i915_gem_request_add_to_client(params->request, file);
if (ret)
goto err_request;
/*
* Save assorted stuff away to pass through to *_submission().
* NB: This data should be 'persistent' and not local as it will
......@@ -1802,9 +1792,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
params->dispatch_flags = dispatch_flags;
params->ctx = ctx;
trace_i915_gem_request_queue(params->request, dispatch_flags);
ret = execbuf_submit(params, args, &eb->vmas);
err_request:
__i915_add_request(params->request, ret == 0);
add_to_client(params->request, file);
if (out_fence) {
if (ret == 0) {
fd_install(out_fence_fd, out_fence->file);
args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
args->rsvd2 |= (u64)out_fence_fd << 32;
out_fence_fd = -1;
} else {
fput(out_fence->file);
}
}
err_batch_unpin:
/*
......@@ -1826,6 +1830,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* intel_gpu_busy should also get a ref, so it will free when the device
* is really idle. */
intel_runtime_pm_put(dev_priv);
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
err_in_fence:
dma_fence_put(in_fence);
return ret;
}
......@@ -1933,11 +1941,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
return -EINVAL;
}
if (args->rsvd2 != 0) {
DRM_DEBUG("dirty rvsd2 field\n");
return -EINVAL;
}
exec2_list = drm_malloc_gfp(args->buffer_count,
sizeof(*exec2_list),
GFP_TEMPORARY);
......
This diff is collapsed.
This diff is collapsed.
......@@ -35,8 +35,10 @@ static void internal_free_pages(struct sg_table *st)
{
struct scatterlist *sg;
for (sg = st->sgl; sg; sg = __sg_next(sg))
for (sg = st->sgl; sg; sg = __sg_next(sg)) {
if (sg_page(sg))
__free_pages(sg_page(sg), get_order(sg->length));
}
sg_free_table(st);
kfree(st);
......@@ -133,6 +135,7 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
return st;
err:
sg_set_page(sg, NULL, 0, 0);
sg_mark_end(sg);
internal_free_pages(st);
return ERR_PTR(-ENOMEM);
......
......@@ -33,6 +33,8 @@
#include <drm/i915_drm.h>
#include "i915_selftest.h"
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
......@@ -84,6 +86,7 @@ struct drm_i915_gem_object {
struct list_head obj_exec_link;
struct list_head batch_pool_link;
I915_SELFTEST_DECLARE(struct list_head st_link);
unsigned long flags;
......@@ -162,11 +165,12 @@ struct drm_i915_gem_object {
struct reservation_object *resv;
/** References from framebuffers, locks out tiling changes. */
unsigned long framebuffer_references;
unsigned int framebuffer_references;
/** Record of address bit 17 of each page at last unbind. */
unsigned long *bit_17;
union {
struct i915_gem_userptr {
uintptr_t ptr;
unsigned read_only :1;
......@@ -176,6 +180,9 @@ struct drm_i915_gem_object {
struct work_struct *work;
} userptr;
unsigned long scratch;
};
/** for phys allocated objects */
struct drm_dma_handle *phys_handle;
......@@ -253,6 +260,16 @@ extern void drm_gem_object_unreference(struct drm_gem_object *);
__deprecated
extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
{
reservation_object_lock(obj->resv, NULL);
}
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
{
reservation_object_unlock(obj->resv);
}
static inline bool
i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
{
......@@ -299,6 +316,12 @@ i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
static inline bool
i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
{
return READ_ONCE(obj->framebuffer_references);
}
static inline unsigned int
i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
{
......@@ -357,5 +380,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
return engine;
}
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
#endif
This diff is collapsed.
......@@ -32,10 +32,12 @@
struct drm_file;
struct drm_i915_gem_object;
struct drm_i915_gem_request;
struct intel_wait {
struct rb_node node;
struct task_struct *tsk;
struct drm_i915_gem_request *request;
u32 seqno;
};
......@@ -119,18 +121,10 @@ struct drm_i915_gem_request {
* The submit fence is used to await upon all of the request's
* dependencies. When it is signaled, the request is ready to run.
* It is used by the driver to then queue the request for execution.
*
* The execute fence is used to signal when the request has been
* sent to hardware.
*
* It is illegal for the submit fence of one request to wait upon the
* execute fence of an earlier request. It should be sufficient to
* wait upon the submit fence of the earlier request.
*/
struct i915_sw_fence submit;
struct i915_sw_fence execute;
wait_queue_t submitq;
wait_queue_t execq;
wait_queue_head_t execute;
/* A list of everyone we wait upon, and everyone who waits upon us.
* Even though we will not be submitted to the hardware before the
......@@ -143,13 +137,12 @@ struct drm_i915_gem_request {
struct i915_priotree priotree;
struct i915_dependency dep;
u32 global_seqno;
/** GEM sequence number associated with the previous request,
* when the HWS breadcrumb is equal to this the GPU is processing
* this request.
/** GEM sequence number associated with this request on the
* global execution timeline. It is zero when the request is not
* on the HW queue (i.e. not on the engine timeline list).
* Its value is guarded by the timeline spinlock.
*/
u32 previous_seqno;
u32 global_seqno;
/** Position in the ring of the start of the request */
u32 head;
......@@ -187,7 +180,7 @@ struct drm_i915_gem_request {
struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */
struct list_head client_list;
struct list_head client_link;
};
extern const struct dma_fence_ops i915_fence_ops;
......@@ -200,8 +193,6 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence)
struct drm_i915_gem_request * __must_check
i915_gem_request_alloc(struct intel_engine_cs *engine,
struct i915_gem_context *ctx);
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file);
void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
static inline struct drm_i915_gem_request *
......@@ -243,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src;
}
/**
* i915_gem_request_global_seqno - report the current global seqno
* @request - the request
*
* A request is assigned a global seqno only when it is on the hardware
* execution queue. The global seqno can be used to maintain a list of
* requests on the same engine in retirement order, for example for
* constructing a priority queue for waiting. Prior to its execution, or
* if it is subsequently removed in the event of preemption, its global
* seqno is zero. As both insertion and removal from the execution queue
* may operate in IRQ context, it is not guarded by the usual struct_mutex
* BKL. Instead those relying on the global seqno must be prepared for its
* value to change between reads. Only when the request is complete can
* the global seqno be stable (due to the memory barriers on submitting
* the commands to the hardware to write the breadcrumb, if the HWS shows
* that it has passed the global seqno and the global seqno is unchanged
* after the read, it is indeed complete).
*/
static u32
i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
{
return READ_ONCE(request->global_seqno);
}
int
i915_gem_request_await_object(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj,
......@@ -259,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
void __i915_gem_request_submit(struct drm_i915_gem_request *request);
void i915_gem_request_submit(struct drm_i915_gem_request *request);
void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
struct intel_rps_client;
#define NO_WAITBOOST ERR_PTR(-1)
#define IS_RPS_CLIENT(p) (!IS_ERR(p))
......@@ -283,46 +301,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
}
static inline bool
__i915_gem_request_started(const struct drm_i915_gem_request *req)
__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
{
GEM_BUG_ON(!req->global_seqno);
GEM_BUG_ON(!seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
req->previous_seqno);
seqno - 1);
}
static inline bool
i915_gem_request_started(const struct drm_i915_gem_request *req)
{
if (!req->global_seqno)
u32 seqno;
seqno = i915_gem_request_global_seqno(req);
if (!seqno)
return false;
return __i915_gem_request_started(req);
return __i915_gem_request_started(req, seqno);
}
static inline bool
__i915_gem_request_completed(const struct drm_i915_gem_request *req)
__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
{
GEM_BUG_ON(!req->global_seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
req->global_seqno);
GEM_BUG_ON(!seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
seqno == i915_gem_request_global_seqno(req);
}
static inline bool
i915_gem_request_completed(const struct drm_i915_gem_request *req)
{
if (!req->global_seqno)
u32 seqno;
seqno = i915_gem_request_global_seqno(req);
if (!seqno)
return false;
return __i915_gem_request_completed(req);
return __i915_gem_request_completed(req, seqno);
}
bool __i915_spin_request(const struct drm_i915_gem_request *request,
int state, unsigned long timeout_us);
u32 seqno, int state, unsigned long timeout_us);
static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
int state, unsigned long timeout_us)
{
return (__i915_gem_request_started(request) &&
__i915_spin_request(request, state, timeout_us));
u32 seqno;
seqno = i915_gem_request_global_seqno(request);
return (__i915_gem_request_started(request, seqno) &&
__i915_spin_request(request, seqno, state, timeout_us));
}
/* We treat requests as fences. This is not be to confused with our
......
......@@ -207,7 +207,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
if (!(flags & I915_SHRINK_ACTIVE) &&
(i915_gem_object_is_active(obj) ||
obj->framebuffer_references))
i915_gem_object_is_framebuffer(obj)))
continue;
if (!can_release_pages(obj))
......@@ -259,10 +259,13 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
{
unsigned long freed;
intel_runtime_pm_get(dev_priv);
freed = i915_gem_shrink(dev_priv, -1UL,
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND |
I915_SHRINK_ACTIVE);
intel_runtime_pm_put(dev_priv);
rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
return freed;
......@@ -380,9 +383,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
return NOTIFY_DONE;
intel_runtime_pm_get(dev_priv);
freed_pages = i915_gem_shrink_all(dev_priv);
intel_runtime_pm_put(dev_priv);
/* Because we may be allocating inside our own driver, we cannot
* assert that there are no objects with pinned pages that are not
......
......@@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
mutex_unlock(&dev_priv->mm.stolen_lock);
}
static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv)
static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv)
{
struct pci_dev *pdev = dev_priv->drm.pdev;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct resource *r;
u32 base;
dma_addr_t base;
/* Almost universally we can find the Graphics Base of Stolen Memory
* at register BSM (0x5c) in the igfx configuration space. On a few
......@@ -189,14 +189,14 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv)
base = tom - tseg_size - ggtt->stolen_size;
}
if (base == 0)
if (base == 0 || add_overflows(base, ggtt->stolen_size))
return 0;
/* make sure we don't clobber the GTT if it's within stolen memory */
if (INTEL_GEN(dev_priv) <= 4 &&
!IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) {
struct {
u32 start, end;
dma_addr_t start, end;
} stolen[2] = {
{ .start = base, .end = base + ggtt->stolen_size, },
{ .start = base, .end = base + ggtt->stolen_size, },
......@@ -228,11 +228,13 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv)
if (stolen[0].start != stolen[1].start ||
stolen[0].end != stolen[1].end) {
dma_addr_t end = base + ggtt->stolen_size - 1;
DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n",
(unsigned long long)ggtt_start,
(unsigned long long)ggtt_end - 1);
DRM_DEBUG_KMS("Stolen memory adjusted to 0x%x-0x%x\n",
base, base + (u32)ggtt->stolen_size - 1);
DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n",
&base, &end);
}
}
......@@ -261,8 +263,10 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv)
* range. Apparently this works.
*/
if (r == NULL && !IS_GEN3(dev_priv)) {
DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n",
base, base + (uint32_t)ggtt->stolen_size);
dma_addr_t end = base + ggtt->stolen_size;
DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n",
&base, &end);
base = 0;
}
}
......@@ -281,13 +285,13 @@ void i915_gem_cleanup_stolen(struct drm_device *dev)
}
static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ?
CTG_STOLEN_RESERVED :
ELK_STOLEN_RESERVED);
phys_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
*base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16;
......@@ -304,7 +308,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
......@@ -330,7 +334,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
......@@ -350,7 +354,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
......@@ -376,11 +380,11 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
phys_addr_t stolen_top;
dma_addr_t stolen_top;
stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
......@@ -399,7 +403,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
phys_addr_t reserved_base, stolen_top;
dma_addr_t reserved_base, stolen_top;
u32 reserved_total, reserved_size;
u32 stolen_usable_start;
......@@ -420,7 +424,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
if (ggtt->stolen_size == 0)
return 0;
dev_priv->mm.stolen_base = i915_stolen_to_physical(dev_priv);
dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv);
if (dev_priv->mm.stolen_base == 0)
return 0;
......@@ -469,8 +473,8 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
if (reserved_base < dev_priv->mm.stolen_base ||
reserved_base + reserved_size > stolen_top) {
phys_addr_t reserved_top = reserved_base + reserved_size;
DRM_DEBUG_KMS("Stolen reserved area [%pa - %pa] outside stolen memory [%pa - %pa]\n",
dma_addr_t reserved_top = reserved_base + reserved_size;
DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n",
&reserved_base, &reserved_top,
&dev_priv->mm.stolen_base, &stolen_top);
return 0;
......
......@@ -158,14 +158,9 @@ i915_tiling_ok(struct drm_i915_gem_object *obj,
if (stride > 8192)
return false;
if (IS_GEN3(i915)) {
if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20)
return false;
} else {
if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19)
if (!is_power_of_2(stride))
return false;
}
}
if (IS_GEN2(i915) ||
(tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915)))
......@@ -176,12 +171,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj,
if (!stride || !IS_ALIGNED(stride, tile_width))
return false;
/* 965+ just needs multiples of tile width */
if (INTEL_GEN(i915) >= 4)
return true;
/* Pre-965 needs power of two tile widths */
return is_power_of_2(stride);
}
static bool i915_vma_fence_prepare(struct i915_vma *vma,
......@@ -248,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
if ((tiling | stride) == obj->tiling_and_stride)
return 0;
if (obj->framebuffer_references)
if (i915_gem_object_is_framebuffer(obj))
return -EBUSY;
/* We need to rebind the object if its current allocation
......@@ -268,6 +258,12 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
if (err)
return err;
i915_gem_object_lock(obj);
if (i915_gem_object_is_framebuffer(obj)) {
i915_gem_object_unlock(obj);
return -EBUSY;
}
/* If the memory has unknown (i.e. varying) swizzling, we pin the
* pages to prevent them being swapped out and causing corruption
* due to the change in swizzling.
......@@ -304,6 +300,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
}
obj->tiling_and_stride = tiling | stride;
i915_gem_object_unlock(obj);
/* Force the fence to be reacquired for GTT access */
i915_gem_release_mmap(obj);
......
......@@ -33,7 +33,13 @@ struct i915_gem_timeline;
struct intel_timeline {
u64 fence_context;
u32 last_submitted_seqno;
u32 seqno;
/**
* Count of outstanding requests, from the time they are constructed
* to the moment they are retired. Loosely coupled to hardware.
*/
u32 inflight_seqnos;
spinlock_t lock;
......@@ -56,7 +62,6 @@ struct intel_timeline {
struct i915_gem_timeline {
struct list_head link;
atomic_t seqno;
struct drm_i915_private *i915;
const char *name;
......
This diff is collapsed.
......@@ -348,7 +348,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
u32 freespace;
int ret;
spin_lock(&client->wq_lock);
spin_lock_irq(&client->wq_lock);
freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
freespace -= client->wq_rsvd;
if (likely(freespace >= wqi_size)) {
......@@ -358,21 +358,27 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
client->no_wq_space++;
ret = -EAGAIN;
}
spin_unlock(&client->wq_lock);
spin_unlock_irq(&client->wq_lock);
return ret;
}
static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size)
{
unsigned long flags;
spin_lock_irqsave(&client->wq_lock, flags);
client->wq_rsvd += size;
spin_unlock_irqrestore(&client->wq_lock, flags);
}
void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
{
const size_t wqi_size = sizeof(struct guc_wq_item);
const int wqi_size = sizeof(struct guc_wq_item);
struct i915_guc_client *client = request->i915->guc.execbuf_client;
GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
spin_lock(&client->wq_lock);
client->wq_rsvd -= wqi_size;
spin_unlock(&client->wq_lock);
guc_client_update_wq_rsvd(client, -wqi_size);
}
/* Construct a Work Item and append it to the GuC's Work Queue */
......@@ -509,15 +515,18 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
unsigned int engine_id = engine->id;
struct intel_guc *guc = &rq->i915->guc;
struct i915_guc_client *client = guc->execbuf_client;
unsigned long flags;
int b_ret;
spin_lock(&client->wq_lock);
guc_wq_item_append(client, rq);
/* WA to flush out the pending GMADR writes to ring buffer. */
if (i915_vma_is_map_and_fenceable(rq->ring->vma))
POSTING_READ_FW(GUC_STATUS);
trace_i915_gem_request_in(rq, 0);
spin_lock_irqsave(&client->wq_lock, flags);
guc_wq_item_append(client, rq);
b_ret = guc_ring_doorbell(client);
client->submissions[engine_id] += 1;
......@@ -527,7 +536,8 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
guc->submissions[engine_id] += 1;
guc->last_seqno[engine_id] = rq->global_seqno;
spin_unlock(&client->wq_lock);
spin_unlock_irqrestore(&client->wq_lock, flags);
}
static void i915_guc_submit(struct drm_i915_gem_request *rq)
......@@ -943,16 +953,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
/* Take over from manual control of ELSP (execlists) */
for_each_engine(engine, dev_priv, id) {
const int wqi_size = sizeof(struct guc_wq_item);
struct drm_i915_gem_request *rq;
engine->submit_request = i915_guc_submit;
engine->schedule = NULL;
/* Replay the current set of previously submitted requests */
spin_lock_irq(&engine->timeline->lock);
list_for_each_entry(rq, &engine->timeline->requests, link) {
client->wq_rsvd += sizeof(struct guc_wq_item);
guc_client_update_wq_rsvd(client, wqi_size);
__i915_guc_submit(rq);
}
spin_unlock_irq(&engine->timeline->lock);
}
return 0;
......
This diff is collapsed.
......@@ -145,7 +145,7 @@ MODULE_PARM_DESC(enable_psr, "Enable PSR "
"(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) "
"Default: -1 (use per-chip default)");
module_param_named_unsafe(alpha_support, i915.alpha_support, int, 0400);
module_param_named_unsafe(alpha_support, i915.alpha_support, bool, 0400);
MODULE_PARM_DESC(alpha_support,
"Enable alpha quality driver support for latest hardware. "
"See also CONFIG_DRM_I915_ALPHA_SUPPORT.");
......@@ -205,9 +205,9 @@ module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600);
MODULE_PARM_DESC(verbose_state_checks,
"Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions.");
module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600);
module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0400);
MODULE_PARM_DESC(nuclear_pageflip,
"Force atomic modeset functionality; asynchronous mode is not yet supported. (default: false).");
"Force enable atomic functionality on platforms that don't have full support yet.");
/* WA to get away with the default setting in VBT for early platforms.Will be removed */
module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400);
......
This diff is collapsed.
This diff is collapsed.
......@@ -1008,7 +1008,7 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv)
{
assert_spin_locked(&dev_priv->perf.hook_lock);
lockdep_assert_held(&dev_priv->perf.hook_lock);
if (dev_priv->perf.oa.exclusive_stream->enabled) {
struct i915_gem_context *ctx =
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -25,6 +25,17 @@
#ifndef __I915_UTILS_H
#define __I915_UTILS_H
#if GCC_VERSION >= 70000
#define add_overflows(A, B) \
__builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0)
#else
#define add_overflows(A, B) ({ \
typeof(A) a = (A); \
typeof(B) b = (B); \
a + b < a; \
})
#endif
#define range_overflows(start, size, max) ({ \
typeof(start) start__ = (start); \
typeof(size) size__ = (size); \
......
This diff is collapsed.
This diff is collapsed.
......@@ -228,8 +228,8 @@ i915_vma_compare(struct i915_vma *vma,
int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
u32 flags);
bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level);
bool
i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
void i915_vma_close(struct i915_vma *vma);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment