Commit fc93ff60 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel into drm-next

- refactor ddi buffer programming a bit (Ville)
- large-scale renaming to untangle naming in the gem code (Chris)
- rework vma/active tracking for accurately reaping idle mappings of shared
  objects (Chris)
- misc dp sst/mst probing corner case fixes (Ville)
- tons of cleanup&tunings all around in gem
- lockless (rcu-protected) request lookup, plus use it everywhere for
  non(b)locking waits (Chris)
- pipe crc debugfs fixes (Rodrigo)
- random fixes all over

* tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel: (222 commits)
  drm/i915: Update DRIVER_DATE to 20160808
  drm/i915: fix aliasing_ppgtt leak
  drm/i915: Update comment before i915_spin_request
  drm/i915: Use drm official vblank_no_hw_counter callback.
  drm/i915: Fix copy_to_user usage for pipe_crc
  Revert "drm/i915: Track active streams also for DP SST"
  drm/i915: fix WaInsertDummyPushConstPs
  drm/i915: Assert that the request hasn't been retired
  drm/i915: Repack fence tiling mode and stride into a single integer
  drm/i915: Document and reject invalid tiling modes
  drm/i915: Remove locking for get_tiling
  drm/i915: Remove pinned check from madvise ioctl
  drm/i915: Reduce locking inside swfinish ioctl
  drm/i915: Remove (struct_mutex) locking for busy-ioctl
  drm/i915: Remove (struct_mutex) locking for wait-ioctl
  drm/i915: Do a nonblocking wait first in pread/pwrite
  drm/i915: Remove unused no-shrinker-steal
  drm/i915: Tidy generation of the GTT mmap offset
  drm/i915/shrinker: Wait before acquiring struct_mutex under oom
  drm/i915: Simplify do_idling() (Ironlake vt-d w/a)
  ...
parents f8725ad1 c5b7e97b
......@@ -70,6 +70,9 @@ Frontbuffer Tracking
.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
:doc: frontbuffer tracking
.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.h
:internal:
.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
:internal:
......
......@@ -25,7 +25,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
i915-y += i915_cmd_parser.o \
i915_gem_batch_pool.o \
i915_gem_context.o \
i915_gem_debug.o \
i915_gem_dmabuf.o \
i915_gem_evict.o \
i915_gem_execbuffer.o \
......@@ -33,6 +32,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_gtt.o \
i915_gem.o \
i915_gem_render_state.o \
i915_gem_request.o \
i915_gem_shrinker.o \
i915_gem_stolen.o \
i915_gem_tiling.o \
......@@ -40,6 +40,7 @@ i915-y += i915_cmd_parser.o \
i915_gpu_error.o \
i915_trace_points.o \
intel_breadcrumbs.o \
intel_engine_cs.o \
intel_lrc.o \
intel_mocs.o \
intel_ringbuffer.o \
......
......@@ -62,23 +62,23 @@
* The parser always rejects such commands.
*
* The majority of the problematic commands fall in the MI_* range, with only a
* few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW).
* few specific commands on each engine (e.g. PIPE_CONTROL and MI_FLUSH_DW).
*
* Implementation:
* Each ring maintains tables of commands and registers which the parser uses in
* scanning batch buffers submitted to that ring.
* Each engine maintains tables of commands and registers which the parser
* uses in scanning batch buffers submitted to that engine.
*
* Since the set of commands that the parser must check for is significantly
* smaller than the number of commands supported, the parser tables contain only
* those commands required by the parser. This generally works because command
* opcode ranges have standard command length encodings. So for commands that
* the parser does not need to check, it can easily skip them. This is
* implemented via a per-ring length decoding vfunc.
* implemented via a per-engine length decoding vfunc.
*
* Unfortunately, there are a number of commands that do not follow the standard
* length encoding for their opcode range, primarily amongst the MI_* commands.
* To handle this, the parser provides a way to define explicit "skip" entries
* in the per-ring command tables.
* in the per-engine command tables.
*
* Other command table entries map fairly directly to high level categories
* mentioned above: rejected, master-only, register whitelist. The parser
......@@ -603,7 +603,7 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
}
static bool validate_cmds_sorted(struct intel_engine_cs *engine,
static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
int cmd_table_count)
{
......@@ -624,8 +624,10 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine,
u32 curr = desc->cmd.value & desc->cmd.mask;
if (curr < previous) {
DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
engine->id, i, j, curr, previous);
DRM_ERROR("CMD: %s [%d] command table not sorted: "
"table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
engine->name, engine->id,
i, j, curr, previous);
ret = false;
}
......@@ -636,7 +638,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine,
return ret;
}
static bool check_sorted(int ring_id,
static bool check_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_reg_descriptor *reg_table,
int reg_count)
{
......@@ -648,8 +650,10 @@ static bool check_sorted(int ring_id,
u32 curr = i915_mmio_reg_offset(reg_table[i].addr);
if (curr < previous) {
DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
ring_id, i, curr, previous);
DRM_ERROR("CMD: %s [%d] register table not sorted: "
"entry=%d reg=0x%08X prev=0x%08X\n",
engine->name, engine->id,
i, curr, previous);
ret = false;
}
......@@ -666,7 +670,7 @@ static bool validate_regs_sorted(struct intel_engine_cs *engine)
for (i = 0; i < engine->reg_table_count; i++) {
table = &engine->reg_tables[i];
if (!check_sorted(engine->id, table->regs, table->num_regs))
if (!check_sorted(engine, table->regs, table->num_regs))
return false;
}
......@@ -736,7 +740,7 @@ static void fini_hash_table(struct intel_engine_cs *engine)
}
/**
* i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer
* intel_engine_init_cmd_parser() - set cmd parser related fields for an engine
* @engine: the engine to initialize
*
* Optionally initializes fields related to batch buffer command parsing in the
......@@ -745,7 +749,7 @@ static void fini_hash_table(struct intel_engine_cs *engine)
*
* Return: non-zero if initialization fails
*/
int i915_cmd_parser_init_ring(struct intel_engine_cs *engine)
int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
{
const struct drm_i915_cmd_table *cmd_tables;
int cmd_table_count;
......@@ -806,8 +810,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine)
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
default:
DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n",
engine->id);
MISSING_CASE(engine->id);
BUG();
}
......@@ -829,13 +832,13 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine)
}
/**
* i915_cmd_parser_fini_ring() - clean up cmd parser related fields
* intel_engine_cleanup_cmd_parser() - clean up cmd parser related fields
* @engine: the engine to clean up
*
* Releases any resources related to command parsing that may have been
* initialized for the specified ring.
* initialized for the specified engine.
*/
void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine)
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
{
if (!engine->needs_cmd_parser)
return;
......@@ -866,9 +869,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
* Returns a pointer to a descriptor for the command specified by cmd_header.
*
* The caller must supply space for a default descriptor via the default_desc
* parameter. If no descriptor for the specified command exists in the ring's
* parameter. If no descriptor for the specified command exists in the engine's
* command parser tables, this function fills in default_desc based on the
* ring's default length encoding and returns default_desc.
* engine's default length encoding and returns default_desc.
*/
static const struct drm_i915_cmd_descriptor*
find_cmd(struct intel_engine_cs *engine,
......@@ -1023,15 +1026,16 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
}
/**
* i915_needs_cmd_parser() - should a given ring use software command parsing?
* intel_engine_needs_cmd_parser() - should a given engine use software
* command parsing?
* @engine: the engine in question
*
* Only certain platforms require software batch buffer command parsing, and
* only when enabled via module parameter.
*
* Return: true if the ring requires software command parsing
* Return: true if the engine requires software command parsing
*/
bool i915_needs_cmd_parser(struct intel_engine_cs *engine)
bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
{
if (!engine->needs_cmd_parser)
return false;
......@@ -1078,8 +1082,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n",
reg_addr, *cmd, engine->id);
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
reg_addr, *cmd, engine->exec_id);
return false;
}
......@@ -1159,11 +1163,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
desc->bits[i].mask;
if (dword != desc->bits[i].expected) {
DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n",
DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n",
*cmd,
desc->bits[i].mask,
desc->bits[i].expected,
dword, engine->id);
dword, engine->exec_id);
return false;
}
}
......@@ -1189,7 +1193,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
int i915_parse_cmds(struct intel_engine_cs *engine,
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
......@@ -1295,7 +1299,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
/* If the command parser is not enabled, report 0 - unsupported */
for_each_engine(engine, dev_priv) {
if (i915_needs_cmd_parser(engine)) {
if (intel_engine_needs_cmd_parser(engine)) {
active = true;
break;
}
......
This diff is collapsed.
......@@ -228,27 +228,6 @@ static void intel_detect_pch(struct drm_device *dev)
pci_dev_put(pch);
}
bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv)
{
if (INTEL_GEN(dev_priv) < 6)
return false;
if (i915.semaphores >= 0)
return i915.semaphores;
/* TODO: make semaphores and Execlists play nicely together */
if (i915.enable_execlists)
return false;
#ifdef CONFIG_INTEL_IOMMU
/* Enable semaphores on SNB when IO remapping is off */
if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped)
return false;
#endif
return true;
}
static int i915_getparam(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
......@@ -324,7 +303,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = 1;
break;
case I915_PARAM_HAS_SEMAPHORES:
value = i915_semaphore_is_enabled(dev_priv);
value = i915.semaphores;
break;
case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
value = 1;
......@@ -999,6 +978,9 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
i915.enable_ppgtt =
intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt);
DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores);
DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores));
}
/**
......@@ -1011,8 +993,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
uint32_t aperture_size;
int ret;
if (i915_inject_load_failure())
......@@ -1022,16 +1002,10 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
intel_sanitize_options(dev_priv);
ret = i915_ggtt_init_hw(dev);
ret = i915_ggtt_probe_hw(dev_priv);
if (ret)
return ret;
ret = i915_ggtt_enable_hw(dev);
if (ret) {
DRM_ERROR("failed to enable GGTT\n");
goto out_ggtt;
}
/* WARNING: Apparently we must kick fbdev drivers before vgacon,
* otherwise the vga fbdev driver falls over. */
ret = i915_kick_out_firmware_fb(dev_priv);
......@@ -1046,6 +1020,16 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
goto out_ggtt;
}
ret = i915_ggtt_init_hw(dev_priv);
if (ret)
return ret;
ret = i915_ggtt_enable_hw(dev_priv);
if (ret) {
DRM_ERROR("failed to enable GGTT\n");
goto out_ggtt;
}
pci_set_master(dev->pdev);
/* overlay on gen2 is broken and can't address above 1G */
......@@ -1058,7 +1042,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
}
}
/* 965GM sometimes incorrectly writes to hardware status page (HWS)
* using 32bit addressing, overwriting memory if HWS is located
* above 4GB.
......@@ -1077,19 +1060,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
}
}
aperture_size = ggtt->mappable_end;
ggtt->mappable =
io_mapping_create_wc(ggtt->mappable_base,
aperture_size);
if (!ggtt->mappable) {
ret = -EIO;
goto out_ggtt;
}
ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base,
aperture_size);
pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY,
PM_QOS_DEFAULT_VALUE);
......@@ -1118,7 +1088,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
return 0;
out_ggtt:
i915_ggtt_cleanup_hw(dev);
i915_ggtt_cleanup_hw(dev_priv);
return ret;
}
......@@ -1130,15 +1100,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
if (dev->pdev->msi_enabled)
pci_disable_msi(dev->pdev);
pm_qos_remove_request(&dev_priv->pm_qos);
arch_phys_wc_del(ggtt->mtrr);
io_mapping_free(ggtt->mappable);
i915_ggtt_cleanup_hw(dev);
i915_ggtt_cleanup_hw(dev_priv);
}
/**
......@@ -1343,7 +1310,7 @@ void i915_driver_unload(struct drm_device *dev)
i915_destroy_error_state(dev);
/* Flush any outstanding unpin_work. */
flush_workqueue(dev_priv->wq);
drain_workqueue(dev_priv->wq);
intel_guc_fini(dev);
i915_gem_fini(dev);
......@@ -1458,8 +1425,6 @@ static int i915_drm_suspend(struct drm_device *dev)
intel_guc_suspend(dev);
intel_suspend_gt_powersave(dev_priv);
intel_display_suspend(dev);
intel_dp_mst_suspend(dev);
......@@ -1586,15 +1551,13 @@ static int i915_drm_resume(struct drm_device *dev)
disable_rpm_wakeref_asserts(dev_priv);
ret = i915_ggtt_enable_hw(dev);
ret = i915_ggtt_enable_hw(dev_priv);
if (ret)
DRM_ERROR("failed to re-enable GGTT\n");
intel_csr_ucode_resume(dev_priv);
mutex_lock(&dev->struct_mutex);
i915_gem_restore_gtt_mappings(dev);
mutex_unlock(&dev->struct_mutex);
i915_gem_resume(dev);
i915_restore_state(dev);
intel_opregion_setup(dev_priv);
......@@ -1652,6 +1615,7 @@ static int i915_drm_resume(struct drm_device *dev)
intel_opregion_notify_adapter(dev_priv, PCI_D0);
intel_autoenable_gt_powersave(dev_priv);
drm_kms_helper_poll_enable(dev);
enable_rpm_wakeref_asserts(dev_priv);
......@@ -1778,8 +1742,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
unsigned reset_counter;
int ret;
intel_reset_gt_powersave(dev_priv);
mutex_lock(&dev->struct_mutex);
/* Clear any previous failed attempts at recovery. Time to try again. */
......@@ -1835,8 +1797,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
* previous concerns that it doesn't respond well to some forms
* of re-init after reset.
*/
if (INTEL_INFO(dev)->gen > 5)
intel_enable_gt_powersave(dev_priv);
intel_autoenable_gt_powersave(dev_priv);
return 0;
......@@ -2462,7 +2423,6 @@ static int intel_runtime_resume(struct device *device)
* we can do is to hope that things will still work (and disable RPM).
*/
i915_gem_init_swizzling(dev);
gen6_update_ring_freq(dev_priv);
intel_runtime_pm_enable_interrupts(dev_priv);
......@@ -2618,6 +2578,7 @@ static struct drm_driver driver = {
.postclose = i915_driver_postclose,
.set_busid = drm_pci_set_busid,
.gem_close_object = i915_gem_close_object,
.gem_free_object = i915_gem_free_object,
.gem_vm_ops = &i915_gem_vm_ops,
......
This diff is collapsed.
This diff is collapsed.
......@@ -41,15 +41,15 @@
/**
* i915_gem_batch_pool_init() - initialize a batch buffer pool
* @dev: the drm device
* @engine: the associated request submission engine
* @pool: the batch buffer pool
*/
void i915_gem_batch_pool_init(struct drm_device *dev,
void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
struct i915_gem_batch_pool *pool)
{
int n;
pool->dev = dev;
pool->engine = engine;
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
INIT_LIST_HEAD(&pool->cache_list[n]);
......@@ -65,18 +65,17 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
{
int n;
WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
while (!list_empty(&pool->cache_list[n])) {
struct drm_i915_gem_object *obj =
list_first_entry(&pool->cache_list[n],
struct drm_i915_gem_object,
batch_pool_link);
list_del(&obj->batch_pool_link);
drm_gem_object_unreference(&obj->base);
}
struct drm_i915_gem_object *obj, *next;
list_for_each_entry_safe(obj, next,
&pool->cache_list[n],
batch_pool_link)
i915_gem_object_put(obj);
INIT_LIST_HEAD(&pool->cache_list[n]);
}
}
......@@ -102,7 +101,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
struct list_head *list;
int n;
WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
/* Compute a power-of-two bucket, but throw everything greater than
* 16KiB into the same bucket: i.e. the the buckets hold objects of
......@@ -115,13 +114,14 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
if (tmp->active)
if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
&tmp->base.dev->struct_mutex))
break;
/* While we're looping, do some clean up */
if (tmp->madv == __I915_MADV_PURGED) {
list_del(&tmp->batch_pool_link);
drm_gem_object_unreference(&tmp->base);
i915_gem_object_put(tmp);
continue;
}
......@@ -134,7 +134,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
if (obj == NULL) {
int ret;
obj = i915_gem_object_create(pool->dev, size);
obj = i915_gem_object_create(&pool->engine->i915->drm, size);
if (IS_ERR(obj))
return obj;
......
......@@ -27,13 +27,15 @@
#include "i915_drv.h"
struct intel_engine_cs;
struct i915_gem_batch_pool {
struct drm_device *dev;
struct intel_engine_cs *engine;
struct list_head cache_list[4];
};
/* i915_gem_batch_pool.c */
void i915_gem_batch_pool_init(struct drm_device *dev,
void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
struct i915_gem_batch_pool *pool);
void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
struct drm_i915_gem_object*
......
This diff is collapsed.
......@@ -23,9 +23,13 @@
* Authors:
* Dave Airlie <airlied@redhat.com>
*/
#include <linux/dma-buf.h>
#include <linux/reservation.h>
#include <drm/drmP.h>
#include "i915_drv.h"
#include <linux/dma-buf.h>
static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
{
......@@ -218,25 +222,73 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
.end_cpu_access = i915_gem_end_cpu_access,
};
static void export_fences(struct drm_i915_gem_object *obj,
struct dma_buf *dma_buf)
{
struct reservation_object *resv = dma_buf->resv;
struct drm_i915_gem_request *req;
unsigned long active;
int idx;
active = __I915_BO_ACTIVE(obj);
if (!active)
return;
/* Serialise with execbuf to prevent concurrent fence-loops */
mutex_lock(&obj->base.dev->struct_mutex);
/* Mark the object for future fences before racily adding old fences */
obj->base.dma_buf = dma_buf;
ww_mutex_lock(&resv->lock, NULL);
for_each_active(active, idx) {
req = i915_gem_active_get(&obj->last_read[idx],
&obj->base.dev->struct_mutex);
if (!req)
continue;
if (reservation_object_reserve_shared(resv) == 0)
reservation_object_add_shared_fence(resv, &req->fence);
i915_gem_request_put(req);
}
req = i915_gem_active_get(&obj->last_write,
&obj->base.dev->struct_mutex);
if (req) {
reservation_object_add_excl_fence(resv, &req->fence);
i915_gem_request_put(req);
}
ww_mutex_unlock(&resv->lock);
mutex_unlock(&obj->base.dev->struct_mutex);
}
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gem_obj, int flags)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct dma_buf *dma_buf;
exp_info.ops = &i915_dmabuf_ops;
exp_info.size = gem_obj->size;
exp_info.flags = flags;
exp_info.priv = gem_obj;
if (obj->ops->dmabuf_export) {
int ret = obj->ops->dmabuf_export(obj);
if (ret)
return ERR_PTR(ret);
}
return dma_buf_export(&exp_info);
dma_buf = dma_buf_export(&exp_info);
if (IS_ERR(dma_buf))
return dma_buf;
export_fences(obj, dma_buf);
return dma_buf;
}
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
......@@ -278,8 +330,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
* Importing dmabuf exported from out own gem increases
* refcount on gem itself instead of f_count of dmabuf.
*/
drm_gem_object_reference(&obj->base);
return &obj->base;
return &i915_gem_object_get(obj)->base;
}
}
......@@ -300,6 +351,16 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
obj->base.import_attach = attach;
/* We use GTT as shorthand for a coherent domain, one that is
* neither in the GPU cache nor in the CPU cache, where all
* writes are immediately visible in memory. (That's not strictly
* true, but it's close! There are internal buffers such as the
* write-combined buffer or a delay through the chipset for GTT
* writes that do require us to treat GTT as a separate cache domain.)
*/
obj->base.read_domains = I915_GEM_DOMAIN_GTT;
obj->base.write_domain = 0;
return &obj->base;
fail_detach:
......
......@@ -33,41 +33,23 @@
#include "intel_drv.h"
#include "i915_trace.h"
static int switch_to_pinned_context(struct drm_i915_private *dev_priv)
static bool
gpu_is_idle(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
if (i915.enable_execlists)
return 0;
for_each_engine(engine, dev_priv) {
struct drm_i915_gem_request *req;
int ret;
if (engine->last_context == NULL)
continue;
if (engine->last_context == dev_priv->kernel_context)
continue;
req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
if (IS_ERR(req))
return PTR_ERR(req);
ret = i915_switch_context(req);
i915_add_request_no_flush(req);
if (ret)
return ret;
if (intel_engine_is_active(engine))
return false;
}
return 0;
return true;
}
static bool
mark_free(struct i915_vma *vma, struct list_head *unwind)
{
if (vma->pin_count)
if (i915_vma_is_pinned(vma))
return false;
if (WARN_ON(!list_empty(&vma->exec_list)))
......@@ -79,7 +61,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
/**
* i915_gem_evict_something - Evict vmas to make room for binding a new one
* @dev: drm_device
* @vm: address space to evict from
* @min_size: size of the desired free space
* @alignment: alignment constraint of the desired free space
......@@ -102,42 +83,37 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
* memory in e.g. the shrinker.
*/
int
i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
int min_size, unsigned alignment, unsigned cache_level,
unsigned long start, unsigned long end,
i915_gem_evict_something(struct i915_address_space *vm,
u64 min_size, u64 alignment,
unsigned cache_level,
u64 start, u64 end,
unsigned flags)
{
struct list_head eviction_list, unwind_list;
struct i915_vma *vma;
int ret = 0;
int pass = 0;
struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct list_head eviction_list;
struct list_head *phases[] = {
&vm->inactive_list,
&vm->active_list,
NULL,
}, **phase;
struct i915_vma *vma, *next;
int ret;
trace_i915_gem_evict(dev, min_size, alignment, flags);
trace_i915_gem_evict(vm, min_size, alignment, flags);
/*
* The goal is to evict objects and amalgamate space in LRU order.
* The oldest idle objects reside on the inactive list, which is in
* retirement order. The next objects to retire are those on the (per
* ring) active list that do not have an outstanding flush. Once the
* hardware reports completion (the seqno is updated after the
* batchbuffer has been finished) the clean buffer objects would
* be retired to the inactive list. Any dirty objects would be added
* to the tail of the flushing list. So after processing the clean
* active objects we need to emit a MI_FLUSH to retire the flushing
* list, hence the retirement order of the flushing list is in
* advance of the dirty objects on the active lists.
* retirement order. The next objects to retire are those in flight,
* on the active list, again in retirement order.
*
* The retirement sequence is thus:
* 1. Inactive objects (already retired)
* 2. Clean active objects
* 3. Flushing list
* 4. Dirty active objects.
* 2. Active objects (will stall on unbinding)
*
* On each list, the oldest objects lie at the HEAD with the freshest
* object on the TAIL.
*/
INIT_LIST_HEAD(&unwind_list);
if (start != 0 || end != vm->total) {
drm_mm_init_scan_with_range(&vm->mm, min_size,
alignment, cache_level,
......@@ -145,96 +121,84 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
} else
drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
search_again:
/* First see if there is a large enough contiguous idle region... */
list_for_each_entry(vma, &vm->inactive_list, vm_link) {
if (mark_free(vma, &unwind_list))
goto found;
}
if (flags & PIN_NONBLOCK)
goto none;
phases[1] = NULL;
/* Now merge in the soon-to-be-expired objects... */
list_for_each_entry(vma, &vm->active_list, vm_link) {
if (mark_free(vma, &unwind_list))
search_again:
INIT_LIST_HEAD(&eviction_list);
phase = phases;
do {
list_for_each_entry(vma, *phase, vm_link)
if (mark_free(vma, &eviction_list))
goto found;
}
} while (*++phase);
none:
/* Nothing found, clean up and bail out! */
while (!list_empty(&unwind_list)) {
vma = list_first_entry(&unwind_list,
struct i915_vma,
exec_list);
list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
ret = drm_mm_scan_remove_block(&vma->node);
BUG_ON(ret);
list_del_init(&vma->exec_list);
INIT_LIST_HEAD(&vma->exec_list);
}
/* Can we unpin some objects such as idle hw contents,
* or pending flips?
* or pending flips? But since only the GGTT has global entries
* such as scanouts, rinbuffers and contexts, we can skip the
* purge when inspecting per-process local address spaces.
*/
if (flags & PIN_NONBLOCK)
if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
return -ENOSPC;
/* Only idle the GPU and repeat the search once */
if (pass++ == 0) {
struct drm_i915_private *dev_priv = to_i915(dev);
if (gpu_is_idle(dev_priv)) {
/* If we still have pending pageflip completions, drop
* back to userspace to give our workqueues time to
* acquire our locks and unpin the old scanouts.
*/
return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC;
}
if (i915_is_ggtt(vm)) {
ret = switch_to_pinned_context(dev_priv);
/* Not everything in the GGTT is tracked via vma (otherwise we
* could evict as required with minimal stalling) so we are forced
* to idle the GPU and explicitly retire outstanding requests in
* the hopes that we can then remove contexts and the like only
* bound by their active reference.
*/
ret = i915_gem_switch_to_kernel_context(dev_priv);
if (ret)
return ret;
}
ret = i915_gem_wait_for_idle(dev_priv);
ret = i915_gem_wait_for_idle(dev_priv, true);
if (ret)
return ret;
i915_gem_retire_requests(dev_priv);
goto search_again;
}
/* If we still have pending pageflip completions, drop
* back to userspace to give our workqueues time to
* acquire our locks and unpin the old scanouts.
*/
return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
found:
/* drm_mm doesn't allow any other other operations while
* scanning, therefore store to be evicted objects on a
* temporary list. */
INIT_LIST_HEAD(&eviction_list);
while (!list_empty(&unwind_list)) {
vma = list_first_entry(&unwind_list,
struct i915_vma,
exec_list);
if (drm_mm_scan_remove_block(&vma->node)) {
list_move(&vma->exec_list, &eviction_list);
drm_gem_object_reference(&vma->obj->base);
continue;
}
* scanning, therefore store to-be-evicted objects on a
* temporary list and take a reference for all before
* calling unbind (which may remove the active reference
* of any of our objects, thus corrupting the list).
*/
list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
if (drm_mm_scan_remove_block(&vma->node))
__i915_vma_pin(vma);
else
list_del_init(&vma->exec_list);
}
/* Unbinding will emit any required flushes */
while (!list_empty(&eviction_list)) {
struct drm_gem_object *obj;
vma = list_first_entry(&eviction_list,
struct i915_vma,
exec_list);
obj = &vma->obj->base;
list_del_init(&vma->exec_list);
__i915_vma_unpin(vma);
if (ret == 0)
ret = i915_vma_unbind(vma);
drm_gem_object_unreference(obj);
}
return ret;
}
......@@ -256,8 +220,8 @@ i915_gem_evict_for_vma(struct i915_vma *target)
vma = container_of(node, typeof(*vma), node);
if (vma->pin_count) {
if (!vma->exec_entry || (vma->pin_count > 1))
if (i915_vma_is_pinned(vma)) {
if (!vma->exec_entry || i915_vma_pin_count(vma) > 1)
/* Object is pinned for some other use */
return -EBUSY;
......@@ -303,22 +267,21 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
struct drm_i915_private *dev_priv = to_i915(vm->dev);
if (i915_is_ggtt(vm)) {
ret = switch_to_pinned_context(dev_priv);
ret = i915_gem_switch_to_kernel_context(dev_priv);
if (ret)
return ret;
}
ret = i915_gem_wait_for_idle(dev_priv);
ret = i915_gem_wait_for_idle(dev_priv, true);
if (ret)
return ret;
i915_gem_retire_requests(dev_priv);
WARN_ON(!list_empty(&vm->active_list));
}
list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link)
if (vma->pin_count == 0)
if (!i915_vma_is_pinned(vma))
WARN_ON(i915_vma_unbind(vma));
return 0;
......
This diff is collapsed.
......@@ -86,20 +86,22 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
uint64_t val;
/* Adjust fence size to match tiled area */
if (obj->tiling_mode != I915_TILING_NONE) {
uint32_t row_size = obj->stride *
(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
if (tiling != I915_TILING_NONE) {
uint32_t row_size = stride *
(tiling == I915_TILING_Y ? 32 : 8);
size = (size / row_size) * row_size;
}
val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
0xfffff000) << 32;
val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
if (obj->tiling_mode == I915_TILING_Y)
val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift;
if (tiling == I915_TILING_Y)
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
val |= I965_FENCE_REG_VALID;
......@@ -122,6 +124,8 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
int pitch_val;
int tile_width;
......@@ -131,17 +135,17 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
"object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
tile_width = 128;
else
tile_width = 512;
/* Note: pitch better be a power of two tile widths */
pitch_val = obj->stride / tile_width;
pitch_val = stride / tile_width;
pitch_val = ffs(pitch_val) - 1;
val = i915_gem_obj_ggtt_offset(obj);
if (obj->tiling_mode == I915_TILING_Y)
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I915_FENCE_SIZE_BITS(size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
......@@ -161,6 +165,8 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
uint32_t pitch_val;
WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
......@@ -169,11 +175,11 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
"object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
i915_gem_obj_ggtt_offset(obj), size);
pitch_val = obj->stride / 128;
pitch_val = stride / 128;
pitch_val = ffs(pitch_val) - 1;
val = i915_gem_obj_ggtt_offset(obj);
if (obj->tiling_mode == I915_TILING_Y)
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I830_FENCE_SIZE_BITS(size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
......@@ -201,9 +207,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
mb();
WARN(obj && (!obj->stride || !obj->tiling_mode),
WARN(obj &&
(!i915_gem_object_get_stride(obj) ||
!i915_gem_object_get_tiling(obj)),
"bogus fence setup with stride: 0x%x, tiling mode: %i\n",
obj->stride, obj->tiling_mode);
i915_gem_object_get_stride(obj),
i915_gem_object_get_tiling(obj));
if (IS_GEN2(dev))
i830_write_fence_reg(dev, reg, obj);
......@@ -248,7 +257,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
if (obj->tiling_mode)
if (i915_gem_object_is_tiled(obj))
i915_gem_release_mmap(obj);
/* As we do not have an associated fence register, we will force
......@@ -261,15 +270,8 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
static int
i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
{
if (obj->last_fenced_req) {
int ret = i915_wait_request(obj->last_fenced_req);
if (ret)
return ret;
i915_gem_request_assign(&obj->last_fenced_req, NULL);
}
return 0;
return i915_gem_active_retire(&obj->last_fence,
&obj->base.dev->struct_mutex);
}
/**
......@@ -368,7 +370,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
bool enable = obj->tiling_mode != I915_TILING_NONE;
bool enable = i915_gem_object_is_tiled(obj);
struct drm_i915_fence_reg *reg;
int ret;
......@@ -438,7 +440,7 @@ i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
WARN_ON(!ggtt_vma ||
dev_priv->fence_regs[obj->fence_reg].pin_count >
ggtt_vma->pin_count);
i915_vma_pin_count(ggtt_vma));
dev_priv->fence_regs[obj->fence_reg].pin_count++;
return true;
} else
......@@ -484,7 +486,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
*/
if (reg->obj) {
i915_gem_object_update_fence(reg->obj, reg,
reg->obj->tiling_mode);
i915_gem_object_get_tiling(reg->obj));
} else {
i915_gem_write_fence(dev, i, NULL);
}
......
This diff is collapsed.
......@@ -36,6 +36,8 @@
#include <linux/io-mapping.h>
#include "i915_gem_request.h"
struct drm_i915_file_private;
typedef uint32_t gen6_pte_t;
......@@ -178,12 +180,32 @@ struct i915_vma {
struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
void __iomem *iomap;
u64 size;
unsigned int flags;
/**
* How many users have pinned this object in GTT space. The following
* users can each hold at most one reference: pwrite/pread, execbuffer
* (objects are not allowed multiple times for the same batchbuffer),
* and the framebuffer code. When switching/pageflipping, the
* framebuffer code has at most two buffers pinned per crtc.
*
* In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
* bits with absolutely no headroom. So use 4 bits.
*/
#define I915_VMA_PIN_MASK 0xf
#define I915_VMA_PIN_OVERFLOW BIT(5)
/** Flags and address space this VMA is bound to */
#define GLOBAL_BIND (1<<0)
#define LOCAL_BIND (1<<1)
unsigned int bound : 4;
bool is_ggtt : 1;
#define I915_VMA_GLOBAL_BIND BIT(6)
#define I915_VMA_LOCAL_BIND BIT(7)
#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
#define I915_VMA_GGTT BIT(8)
#define I915_VMA_CLOSED BIT(9)
unsigned int active;
struct i915_gem_active last_read[I915_NUM_ENGINES];
/**
* Support different GGTT views into the same object.
......@@ -208,20 +230,46 @@ struct i915_vma {
struct hlist_node exec_node;
unsigned long exec_handle;
struct drm_i915_gem_exec_object2 *exec_entry;
/**
* How many users have pinned this object in GTT space. The following
* users can each hold at most one reference: pwrite/pread, execbuffer
* (objects are not allowed multiple times for the same batchbuffer),
* and the framebuffer code. When switching/pageflipping, the
* framebuffer code has at most two buffers pinned per crtc.
*
* In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
* bits with absolutely no headroom. So use 4 bits. */
unsigned int pin_count:4;
#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
};
static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_GGTT;
}
static inline bool i915_vma_is_closed(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_CLOSED;
}
static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
{
return vma->active;
}
static inline bool i915_vma_is_active(const struct i915_vma *vma)
{
return i915_vma_get_active(vma);
}
static inline void i915_vma_set_active(struct i915_vma *vma,
unsigned int engine)
{
vma->active |= BIT(engine);
}
static inline void i915_vma_clear_active(struct i915_vma *vma,
unsigned int engine)
{
vma->active &= ~BIT(engine);
}
static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
unsigned int engine)
{
return vma->active & BIT(engine);
}
struct i915_page_dma {
struct page *page;
union {
......@@ -272,11 +320,20 @@ struct i915_pml4 {
struct i915_address_space {
struct drm_mm mm;
struct drm_device *dev;
/* Every address space belongs to a struct file - except for the global
* GTT that is owned by the driver (and so @file is set to NULL). In
* principle, no information should leak from one context to another
* (or between files/processes etc) unless explicitly shared by the
* owner. Tracking the owner is important in order to free up per-file
* objects along with the file, to aide resource tracking, and to
* assign blame.
*/
struct drm_i915_file_private *file;
struct list_head global_link;
u64 start; /* Start offset always 0 for dri2 */
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
bool is_ggtt;
bool closed;
struct i915_page_scratch *scratch_page;
struct i915_page_table *scratch_pt;
......@@ -306,6 +363,13 @@ struct i915_address_space {
*/
struct list_head inactive_list;
/**
* List of vma that have been unbound.
*
* A reference is not held on the buffer while on this list.
*/
struct list_head unbound_list;
/* FIXME: Need a more generic return type */
gen6_pte_t (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
......@@ -338,7 +402,7 @@ struct i915_address_space {
u32 flags);
};
#define i915_is_ggtt(V) ((V)->is_ggtt)
#define i915_is_ggtt(V) (!(V)->file)
/* The Graphics Translation Table is the way in which GEN hardware translates a
* Graphics Virtual Address into a Physical Address. In addition to the normal
......@@ -354,7 +418,6 @@ struct i915_ggtt {
size_t stolen_usable_size; /* Total size minus BIOS reserved */
size_t stolen_reserved_base;
size_t stolen_reserved_size;
size_t size; /* Total size of Global GTT */
u64 mappable_end; /* End offset that we can CPU map */
struct io_mapping *mappable; /* Mapping to our CPU mappable region */
phys_addr_t mappable_base; /* PA of our GMADR */
......@@ -365,8 +428,6 @@ struct i915_ggtt {
bool do_idle_maps;
int mtrr;
int (*probe)(struct i915_ggtt *ggtt);
};
struct i915_hw_ppgtt {
......@@ -380,8 +441,6 @@ struct i915_hw_ppgtt {
struct i915_page_directory pd; /* GEN6-7 */
};
struct drm_i915_file_private *file_priv;
gen6_pte_t __iomem *pd_addr;
int (*enable)(struct i915_hw_ppgtt *ppgtt);
......@@ -521,14 +580,15 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
px_dma(ppgtt->base.scratch_pd);
}
int i915_ggtt_init_hw(struct drm_device *dev);
int i915_ggtt_enable_hw(struct drm_device *dev);
void i915_gem_init_ggtt(struct drm_device *dev);
void i915_ggtt_cleanup_hw(struct drm_device *dev);
int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
int i915_gem_init_ggtt(struct drm_i915_private *dev_priv);
void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
int i915_ppgtt_init_hw(struct drm_device *dev);
void i915_ppgtt_release(struct kref *kref);
struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *fpriv);
static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
{
......@@ -562,9 +622,66 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
return true;
}
size_t
i915_ggtt_view_size(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view);
/* Flags used by pin/bind&friends. */
#define PIN_NONBLOCK BIT(0)
#define PIN_MAPPABLE BIT(1)
#define PIN_ZONE_4G BIT(2)
#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */
#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */
#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */
#define PIN_UPDATE BIT(8)
#define PIN_HIGH BIT(9)
#define PIN_OFFSET_BIAS BIT(10)
#define PIN_OFFSET_FIXED BIT(11)
#define PIN_OFFSET_MASK (~4095)
int __i915_vma_do_pin(struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
static inline int __must_check
i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
/* Pin early to prevent the shrinker/eviction logic from destroying
* our vma as we insert and bind.
*/
if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
return 0;
return __i915_vma_do_pin(vma, size, alignment, flags);
}
static inline int i915_vma_pin_count(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_PIN_MASK;
}
static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
{
return i915_vma_pin_count(vma);
}
static inline void __i915_vma_pin(struct i915_vma *vma)
{
vma->flags++;
GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
}
static inline void __i915_vma_unpin(struct i915_vma *vma)
{
GEM_BUG_ON(!i915_vma_is_pinned(vma));
vma->flags--;
}
static inline void i915_vma_unpin(struct i915_vma *vma)
{
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
__i915_vma_unpin(vma);
}
/**
* i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
......@@ -580,6 +697,7 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
* Returns a valid iomapped pointer or ERR_PTR.
*/
void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
/**
* i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
......@@ -593,9 +711,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
{
lockdep_assert_held(&vma->vm->dev->struct_mutex);
GEM_BUG_ON(vma->pin_count == 0);
GEM_BUG_ON(vma->iomap == NULL);
vma->pin_count--;
i915_vma_unpin(vma);
}
#endif
......@@ -28,10 +28,18 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
struct render_state {
const struct intel_renderstate_rodata *rodata;
struct drm_i915_gem_object *obj;
u64 ggtt_offset;
u32 aux_batch_size;
u32 aux_batch_offset;
};
static const struct intel_renderstate_rodata *
render_state_get_rodata(const int gen)
render_state_get_rodata(const struct drm_i915_gem_request *req)
{
switch (gen) {
switch (INTEL_GEN(req->i915)) {
case 6:
return &gen6_null_state;
case 7:
......@@ -45,35 +53,6 @@ render_state_get_rodata(const int gen)
return NULL;
}
static int render_state_init(struct render_state *so,
struct drm_i915_private *dev_priv)
{
int ret;
so->gen = INTEL_GEN(dev_priv);
so->rodata = render_state_get_rodata(so->gen);
if (so->rodata == NULL)
return 0;
if (so->rodata->batch_items * 4 > 4096)
return -EINVAL;
so->obj = i915_gem_object_create(&dev_priv->drm, 4096);
if (IS_ERR(so->obj))
return PTR_ERR(so->obj);
ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
if (ret)
goto free_gem;
so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
return 0;
free_gem:
drm_gem_object_unreference(&so->obj->base);
return ret;
}
/*
* Macro to add commands to auxiliary batch.
* This macro only checks for page overflow before inserting the commands,
......@@ -96,6 +75,7 @@ static int render_state_setup(struct render_state *so)
{
struct drm_device *dev = so->obj->base.dev;
const struct intel_renderstate_rodata *rodata = so->rodata;
const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
unsigned int i = 0, reloc_index = 0;
struct page *page;
u32 *d;
......@@ -114,7 +94,7 @@ static int render_state_setup(struct render_state *so)
if (i * 4 == rodata->reloc[reloc_index]) {
u64 r = s + so->ggtt_offset;
s = lower_32_bits(r);
if (so->gen >= 8) {
if (has_64bit_reloc) {
if (i + 1 >= rodata->batch_items ||
rodata->batch[i + 1] != 0) {
ret = -EINVAL;
......@@ -192,67 +172,55 @@ static int render_state_setup(struct render_state *so)
#undef OUT_BATCH
void i915_gem_render_state_fini(struct render_state *so)
{
i915_gem_object_ggtt_unpin(so->obj);
drm_gem_object_unreference(&so->obj->base);
}
int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
struct render_state *so)
int i915_gem_render_state_init(struct drm_i915_gem_request *req)
{
struct render_state so;
int ret;
if (WARN_ON(engine->id != RCS))
if (WARN_ON(req->engine->id != RCS))
return -ENOENT;
ret = render_state_init(so, engine->i915);
if (ret)
return ret;
if (so->rodata == NULL)
so.rodata = render_state_get_rodata(req);
if (!so.rodata)
return 0;
ret = render_state_setup(so);
if (ret) {
i915_gem_render_state_fini(so);
return ret;
}
if (so.rodata->batch_items * 4 > 4096)
return -EINVAL;
return 0;
}
so.obj = i915_gem_object_create(&req->i915->drm, 4096);
if (IS_ERR(so.obj))
return PTR_ERR(so.obj);
int i915_gem_render_state_init(struct drm_i915_gem_request *req)
{
struct render_state so;
int ret;
ret = i915_gem_render_state_prepare(req->engine, &so);
ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0);
if (ret)
return ret;
goto err_obj;
if (so.rodata == NULL)
return 0;
so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj);
ret = render_state_setup(&so);
if (ret)
goto err_unpin;
ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset,
ret = req->engine->emit_bb_start(req, so.ggtt_offset,
so.rodata->batch_items * 4,
I915_DISPATCH_SECURE);
if (ret)
goto out;
goto err_unpin;
if (so.aux_batch_size > 8) {
ret = req->engine->dispatch_execbuffer(req,
ret = req->engine->emit_bb_start(req,
(so.ggtt_offset +
so.aux_batch_offset),
so.aux_batch_size,
I915_DISPATCH_SECURE);
if (ret)
goto out;
goto err_unpin;
}
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
out:
i915_gem_render_state_fini(&so);
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0);
err_unpin:
i915_gem_object_ggtt_unpin(so.obj);
err_obj:
i915_gem_object_put(so.obj);
return ret;
}
......@@ -26,24 +26,6 @@
#include <linux/types.h>
struct intel_renderstate_rodata {
const u32 *reloc;
const u32 *batch;
const u32 batch_items;
};
struct render_state {
const struct intel_renderstate_rodata *rodata;
struct drm_i915_gem_object *obj;
u64 ggtt_offset;
int gen;
u32 aux_batch_size;
u32 aux_batch_offset;
};
int i915_gem_render_state_init(struct drm_i915_gem_request *req);
void i915_gem_render_state_fini(struct render_state *so);
int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
struct render_state *so);
#endif /* _I915_GEM_RENDER_STATE_H_ */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -173,6 +173,7 @@ static const struct intel_device_info intel_pineview_info = {
.gen = 3, .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .num_pipes = 2,
.need_gfx_hws = 1, .has_hotplug = 1,
.has_overlay = 1,
.ring_mask = RENDER_RING,
GEN_DEFAULT_PIPEOFFSETS,
CURSOR_OFFSETS,
};
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -160,7 +160,6 @@ extern int intel_guc_resume(struct drm_device *dev);
int i915_guc_submission_init(struct drm_i915_private *dev_priv);
int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
int i915_guc_submit(struct drm_i915_gem_request *rq);
void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment