Merge branch 'etnaviv/next' of https://git.pengutronix.de/git/lst/linux into drm-next

This time we've added support for reporting of GPU load via the common fdinfo format, as already supported by multiple other drivers. Improved diagnostic messages for MMU faults. And finally added experimental support for driving the VeriSilicon NPU cores, which are very close relatives to the GPU designs, so close in fact that they can run the same compute instruction set, but with a big NN-fabric/matrix/tensor execution array glued to the side. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas Stach <l.stach@pengutronix.de> Link: https://patchwork.freedesktop.org/patch/msgid/80ceb4eedf7d88e434deeb69607d5ce0a0759581.camel@pengutronix.de

Merge branch 'etnaviv/next' of https://git.pengutronix.de/git/lst/linux into drm-next
This time we've added support for reporting of GPU load via the common fdinfo format, as already supported by multiple other drivers. Improved diagnostic messages for MMU faults. And finally added experimental support for driving the VeriSilicon NPU cores, which are very close relatives to the GPU designs, so close in fact that they can run the same compute instruction set, but with a big NN-fabric/matrix/tensor execution array glued to the side. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas Stach <l.stach@pengutronix.de> Link: https://patchwork.freedesktop.org/patch/msgid/80ceb4eedf7d88e434deeb69607d5ce0a0759581.camel@pengutronix.de
48075a66 · Dave Airlie · 78e98001 · 4c22c61e · 48075a66 · 48075a66
Commit 48075a66 authored Feb 09, 2023 by Dave Airlie
10 changed files
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -22,6 +22,7 @@
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
 #include "etnaviv_perfmon.h"
+#include "common.xml.h"

 /*
 * DRM operations:
@@ -56,6 +57,11 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 	if (!ctx)
 		return -ENOMEM;

+	ret = xa_alloc_cyclic(&priv->active_contexts, &ctx->id, ctx,
+			      xa_limit_32b, &priv->next_context_id, GFP_KERNEL);
+	if (ret < 0)
+		goto out_free;
+
 	ctx->mmu = etnaviv_iommu_context_init(priv->mmu_global,
 					      priv->cmdbuf_suballoc);
 	if (!ctx->mmu) {
@@ -99,6 +105,8 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)

 	etnaviv_iommu_context_put(ctx->mmu);

+	xa_erase(&priv->active_contexts, ctx->id);
+
 	kfree(ctx);
 }

@@ -468,7 +476,47 @@ static const struct drm_ioctl_desc etnaviv_ioctls[] = {
 	ETNA_IOCTL(PM_QUERY_SIG, pm_query_sig, DRM_RENDER_ALLOW),
 };

-DEFINE_DRM_GEM_FOPS(fops);
+static void etnaviv_fop_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct drm_file *file = f->private_data;
+	struct drm_device *dev = file->minor->dev;
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct etnaviv_file_private *ctx = file->driver_priv;
+
+	/*
+	 * For a description of the text output format used here, see
+	 * Documentation/gpu/drm-usage-stats.rst.
+	 */
+	seq_printf(m, "drm-driver:\t%s\n", dev->driver->name);
+	seq_printf(m, "drm-client-id:\t%u\n", ctx->id);
+
+	for (int i = 0; i < ETNA_MAX_PIPES; i++) {
+		struct etnaviv_gpu *gpu = priv->gpu[i];
+		char engine[10] = "UNK";
+		int cur = 0;
+
+		if (!gpu)
+			continue;
+
+		if (gpu->identity.features & chipFeatures_PIPE_2D)
+			cur = snprintf(engine, sizeof(engine), "2D");
+		if (gpu->identity.features & chipFeatures_PIPE_3D)
+			cur = snprintf(engine + cur, sizeof(engine) - cur,
+				       "%s3D", cur ? "/" : "");
+		if (gpu->identity.nn_core_count > 0)
+			cur = snprintf(engine + cur, sizeof(engine) - cur,
+				       "%sNN", cur ? "/" : "");
+
+		seq_printf(m, "drm-engine-%s:\t%llu ns\n", engine,
+			   ctx->sched_entity[i].elapsed_ns);
+	}
+}
+
+static const struct file_operations fops = {
+	.owner = THIS_MODULE,
+	DRM_GEM_FOPS,
+	.show_fdinfo = etnaviv_fop_show_fdinfo,
+};

 static const struct drm_driver etnaviv_drm_driver = {
 	.driver_features    = DRIVER_GEM | DRIVER_RENDER,
@@ -514,6 +562,8 @@ static int etnaviv_bind(struct device *dev)

 	dma_set_max_seg_size(dev, SZ_2G);

+	xa_init_flags(&priv->active_contexts, XA_FLAGS_ALLOC);
+
 	mutex_init(&priv->gem_lock);
 	INIT_LIST_HEAD(&priv->gem_list);
 	priv->num_gpus = 0;
@@ -563,6 +613,8 @@ static void etnaviv_unbind(struct device *dev)

 	etnaviv_cmdbuf_suballoc_destroy(priv->cmdbuf_suballoc);

+	xa_destroy(&priv->active_contexts);
+
 	drm->dev_private = NULL;
 	kfree(priv);


--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -12,6 +12,7 @@
 #include <linux/sizes.h>
 #include <linux/time64.h>
 #include <linux/types.h>
+#include <linux/xarray.h>

 #include <drm/drm_drv.h>
 #include <drm/drm_gem.h>
@@ -28,6 +29,7 @@ struct etnaviv_iommu_global;
 #define ETNAVIV_SOFTPIN_START_ADDRESS	SZ_4M /* must be >= SUBALLOC_SIZE */

 struct etnaviv_file_private {
+	int id;
 	struct etnaviv_iommu_context	*mmu;
 	struct drm_sched_entity		sched_entity[ETNA_MAX_PIPES];
 };
@@ -40,6 +42,9 @@ struct etnaviv_drm_private {
 	struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
 	struct etnaviv_iommu_global *mmu_global;

+	struct xarray active_contexts;
+	u32 next_context_id;
+
 	/* list of GEM objects: */
 	struct mutex gem_lock;
 	struct list_head gem_list;

--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -393,10 +393,11 @@ static void submit_cleanup(struct kref *kref)
 	wake_up_all(&submit->gpu->fence_event);

 	if (submit->out_fence) {
-		/* first remove from IDR, so fence can not be found anymore */
-		mutex_lock(&submit->gpu->fence_lock);
-		idr_remove(&submit->gpu->fence_idr, submit->out_fence_id);
-		mutex_unlock(&submit->gpu->fence_lock);
+		/*
+		 * Remove from user fence array before dropping the reference,
+		 * so fence can not be found in lookup anymore.
+		 */
+		xa_erase(&submit->gpu->user_fences, submit->out_fence_id);
 		dma_fence_put(submit->out_fence);
 	}


--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -773,6 +773,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}

+	if (gpu->identity.nn_core_count > 0)
+		dev_warn(gpu->dev, "etnaviv has been instantiated on a NPU, "
+                                   "for which the UAPI is still experimental\n");
+
 	/* Exclude VG cores with FE2.0 */
 	if (gpu->identity.features & chipFeatures_PIPE_VG &&
 	    gpu->identity.features & chipFeatures_FE20) {
@@ -957,6 +961,8 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 			gpu->identity.vertex_cache_size);
 	seq_printf(m, "\t shader_core_count: %d\n",
 			gpu->identity.shader_core_count);
+	seq_printf(m, "\t nn_core_count: %d\n",
+			gpu->identity.nn_core_count);
 	seq_printf(m, "\t pixel_pipes: %d\n",
 			gpu->identity.pixel_pipes);
 	seq_printf(m, "\t vertex_output_buffer_size: %d\n",
@@ -1240,7 +1246,7 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 	 * pretends we didn't find a fence in that case.
 	 */
 	rcu_read_lock();
-	fence = idr_find(&gpu->fence_idr, id);
+	fence = xa_load(&gpu->user_fences, id);
 	if (fence)
 		fence = dma_fence_get_rcu(fence);
 	rcu_read_unlock();
@@ -1450,6 +1456,15 @@ static void sync_point_worker(struct work_struct *work)

 static void dump_mmu_fault(struct etnaviv_gpu *gpu)
 {
+	static const char *fault_reasons[] = {
+		"slave not present",
+		"page not present",
+		"write violation",
+		"out of bounds",
+		"read security violation",
+		"write security violation",
+	};
+
 	u32 status_reg, status;
 	int i;

@@ -1462,18 +1477,25 @@ static void dump_mmu_fault(struct etnaviv_gpu *gpu)
 	dev_err_ratelimited(gpu->dev, "MMU fault status 0x%08x\n", status);

 	for (i = 0; i < 4; i++) {
+		const char *reason = "unknown";
 		u32 address_reg;
+		u32 mmu_status;

-		if (!(status & (VIVS_MMUv2_STATUS_EXCEPTION0__MASK << (i * 4))))
+		mmu_status = (status >> (i * 4)) & VIVS_MMUv2_STATUS_EXCEPTION0__MASK;
+		if (!mmu_status)
 			continue;

+		if ((mmu_status - 1) < ARRAY_SIZE(fault_reasons))
+			reason = fault_reasons[mmu_status - 1];
+
 		if (gpu->sec_mode == ETNA_SEC_NONE)
 			address_reg = VIVS_MMUv2_EXCEPTION_ADDR(i);
 		else
 			address_reg = VIVS_MMUv2_SEC_EXCEPTION_ADDR;

-		dev_err_ratelimited(gpu->dev, "MMU %d fault addr 0x%08x\n", i,
-				    gpu_read(gpu, address_reg));
+		dev_err_ratelimited(gpu->dev,
+				    "MMU %d fault (%s) addr 0x%08x\n",
+				    i, reason, gpu_read(gpu, address_reg));
 	}
 }

@@ -1629,7 +1651,6 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 	return etnaviv_gpu_clk_disable(gpu);
 }

-#ifdef CONFIG_PM
 static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 {
 	int ret;
@@ -1645,7 +1666,6 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)

 	return 0;
 }
-#endif

 static int
 etnaviv_gpu_cooling_get_max_state(struct thermal_cooling_device *cdev,
@@ -1713,18 +1733,17 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	if (ret)
 		goto out_workqueue;

-#ifdef CONFIG_PM
+	if (IS_ENABLED(CONFIG_PM))
 		ret = pm_runtime_get_sync(gpu->dev);
-#else
+	else
 		ret = etnaviv_gpu_clk_enable(gpu);
-#endif
 	if (ret < 0)
 		goto out_sched;


 	gpu->drm = drm;
 	gpu->fence_context = dma_fence_context_alloc(1);
-	idr_init(&gpu->fence_idr);
+	xa_init_flags(&gpu->user_fences, XA_FLAGS_ALLOC);
 	spin_lock_init(&gpu->fence_spinlock);

 	INIT_WORK(&gpu->sync_point_work, sync_point_worker);
@@ -1761,12 +1780,12 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,

 	etnaviv_sched_fini(gpu);

-#ifdef CONFIG_PM
+	if (IS_ENABLED(CONFIG_PM)) {
 		pm_runtime_get_sync(gpu->dev);
 		pm_runtime_put_sync_suspend(gpu->dev);
-#else
+	} else {
 		etnaviv_gpu_hw_suspend(gpu);
-#endif
+	}

 	if (gpu->mmu_context)
 		etnaviv_iommu_context_put(gpu->mmu_context);
@@ -1778,7 +1797,7 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 	}

 	gpu->drm = NULL;
-	idr_destroy(&gpu->fence_idr);
+	xa_destroy(&gpu->user_fences);

 	if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
 		thermal_cooling_device_unregister(gpu->cooling);
@@ -1810,7 +1829,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)

 	gpu->dev = &pdev->dev;
 	mutex_init(&gpu->lock);
-	mutex_init(&gpu->fence_lock);
+	mutex_init(&gpu->sched_lock);

 	/* Map registers: */
 	gpu->mmio = devm_platform_ioremap_resource(pdev, 0);
@@ -1880,7 +1899,6 @@ static int etnaviv_gpu_platform_remove(struct platform_device *pdev)
 	return 0;
 }

-#ifdef CONFIG_PM
 static int etnaviv_gpu_rpm_suspend(struct device *dev)
 {
 	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
@@ -1923,18 +1941,16 @@ static int etnaviv_gpu_rpm_resume(struct device *dev)

 	return 0;
 }
-#endif

 static const struct dev_pm_ops etnaviv_gpu_pm_ops = {
-	SET_RUNTIME_PM_OPS(etnaviv_gpu_rpm_suspend, etnaviv_gpu_rpm_resume,
-			   NULL)
+	RUNTIME_PM_OPS(etnaviv_gpu_rpm_suspend, etnaviv_gpu_rpm_resume, NULL)
 };

 struct platform_driver etnaviv_gpu_driver = {
 	.driver = {
 		.name = "etnaviv-gpu",
 		.owner = THIS_MODULE,
-		.pm = &etnaviv_gpu_pm_ops,
+		.pm = pm_ptr(&etnaviv_gpu_pm_ops),
 		.of_match_table = etnaviv_gpu_match,
 	},
 	.probe = etnaviv_gpu_platform_probe,

--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -51,6 +51,9 @@ struct etnaviv_chip_identity {
 	/* Number of shader cores. */
 	u32 shader_core_count;

+	/* Number of Neural Network cores. */
+	u32 nn_core_count;
+
 	/* Size of the vertex cache. */
 	u32 vertex_cache_size;

@@ -100,6 +103,7 @@ struct etnaviv_gpu {
 	struct etnaviv_chip_identity identity;
 	enum etnaviv_sec_mode sec_mode;
 	struct workqueue_struct *wq;
+	struct mutex sched_lock;
 	struct drm_gpu_scheduler sched;
 	bool initialized;
 	bool fe_running;
@@ -117,8 +121,8 @@ struct etnaviv_gpu {
 	u32 idle_mask;

 	/* Fencing support */
-	struct mutex fence_lock;
-	struct idr fence_idr;
+	struct xarray user_fences;
+	u32 next_user_fence;
 	u32 next_fence;
 	u32 completed_fence;
 	wait_queue_head_t fence_event;

--- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
@@ -16,6 +16,7 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
 		.register_max = 64,
 		.thread_count = 128,
 		.shader_core_count = 1,
+		.nn_core_count = 0,
 		.vertex_cache_size = 8,
 		.vertex_output_buffer_size = 1024,
 		.pixel_pipes = 1,
@@ -47,6 +48,7 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
 		.register_max = 64,
 		.thread_count = 512,
 		.shader_core_count = 2,
+		.nn_core_count = 0,
 		.vertex_cache_size = 16,
 		.vertex_output_buffer_size = 1024,
 		.pixel_pipes = 1,
@@ -78,6 +80,7 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
 		.register_max = 64,
 		.thread_count = 512,
 		.shader_core_count = 2,
+		.nn_core_count = 0,
 		.vertex_cache_size = 16,
 		.vertex_output_buffer_size = 1024,
 		.pixel_pipes = 1,
@@ -140,6 +143,7 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
 		.register_max = 64,
 		.thread_count = 1024,
 		.shader_core_count = 4,
+		.nn_core_count = 0,
 		.vertex_cache_size = 16,
 		.vertex_output_buffer_size = 1024,
 		.pixel_pipes = 2,
@@ -161,6 +165,38 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
 		.minor_features10 = 0x90044250,
 		.minor_features11 = 0x00000024,
 	},
+	{
+		.model = 0x8000,
+		.revision = 0x7120,
+		.product_id = 0x45080009,
+		.customer_id = 0x88,
+		.eco_id = 0,
+		.stream_count = 8,
+		.register_max = 64,
+		.thread_count = 256,
+		.shader_core_count = 1,
+		.nn_core_count = 8,
+		.vertex_cache_size = 16,
+		.vertex_output_buffer_size = 1024,
+		.pixel_pipes = 1,
+		.instruction_count = 512,
+		.num_constants = 320,
+		.buffer_size = 0,
+		.varyings_count = 16,
+		.features = 0xe0287cac,
+		.minor_features0 = 0xc1799eff,
+		.minor_features1 = 0xfefbfadb,
+		.minor_features2 = 0xeb9d6fbf,
+		.minor_features3 = 0xedfffced,
+		.minor_features4 = 0xd30dafc7,
+		.minor_features5 = 0x7b5ac333,
+		.minor_features6 = 0xfc8ee200,
+		.minor_features7 = 0x03fffa6f,
+		.minor_features8 = 0x00fe0ef0,
+		.minor_features9 = 0x0088003c,
+		.minor_features10 = 0x108048c0,
+		.minor_features11 = 0x00000010,
+	},
 };

 bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu)

--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -97,24 +97,24 @@ static const struct drm_sched_backend_ops etnaviv_sched_ops = {

 int etnaviv_sched_push_job(struct etnaviv_gem_submit *submit)
 {
-	int ret = 0;
+	struct etnaviv_gpu *gpu = submit->gpu;
+	int ret;

 	/*
-	 * Hold the fence lock across the whole operation to avoid jobs being
+	 * Hold the sched lock across the whole operation to avoid jobs being
 	 * pushed out of order with regard to their sched fence seqnos as
 	 * allocated in drm_sched_job_arm.
 	 */
-	mutex_lock(&submit->gpu->fence_lock);
+	mutex_lock(&gpu->sched_lock);

 	drm_sched_job_arm(&submit->sched_job);

 	submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished);
-	submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr,
-						submit->out_fence, 0,
-						INT_MAX, GFP_KERNEL);
-	if (submit->out_fence_id < 0) {
+	ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id,
+			      submit->out_fence, xa_limit_32b,
+			      &gpu->next_user_fence, GFP_KERNEL);
+	if (ret < 0) {
 		drm_sched_job_cleanup(&submit->sched_job);
-		ret = -ENOMEM;
 		goto out_unlock;
 	}

@@ -124,7 +124,7 @@ int etnaviv_sched_push_job(struct etnaviv_gem_submit *submit)
 	drm_sched_entity_push_job(&submit->sched_job);

 out_unlock:
-	mutex_unlock(&submit->gpu->fence_lock);
+	mutex_unlock(&gpu->sched_lock);

 	return ret;
 }

--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -8,17 +8,17 @@ This file was generated by the rules-ng-ng headergen tool in this git repository
 git clone git://0x04.net/rules-ng-ng

 The rules-ng-ng source files this header was generated from are:
- state.xml     (  26666 bytes, from 2019-12-20 21:20:35)
- common.xml    (  35468 bytes, from 2018-02-10 13:09:26)
- common_3d.xml (  15058 bytes, from 2019-12-28 20:02:03)
- state_hi.xml  (  30552 bytes, from 2019-12-28 20:02:48)
- copyright.xml (   1597 bytes, from 2018-02-10 13:09:26)
- state_2d.xml  (  51552 bytes, from 2018-02-10 13:09:26)
- state_3d.xml  (  83098 bytes, from 2019-12-28 20:02:03)
- state_blt.xml (  14252 bytes, from 2019-10-20 19:59:15)
- state_vg.xml  (   5975 bytes, from 2018-02-10 13:09:26)
-
-Copyright (C) 2012-2019 by the following authors:
+- state.xml     (  27198 bytes, from 2022-04-22 10:35:24)
+- common.xml    (  35468 bytes, from 2020-10-28 12:56:03)
+- common_3d.xml (  15058 bytes, from 2020-10-28 12:56:03)
+- state_hi.xml  (  34804 bytes, from 2022-12-02 09:06:28)
+- copyright.xml (   1597 bytes, from 2020-10-28 12:56:03)
+- state_2d.xml  (  51552 bytes, from 2020-10-28 12:56:03)
+- state_3d.xml  (  84445 bytes, from 2022-11-15 15:59:38)
+- state_blt.xml (  14424 bytes, from 2022-11-07 11:18:41)
+- state_vg.xml  (   5975 bytes, from 2020-10-28 12:56:03)
+
+Copyright (C) 2012-2022 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
@@ -321,16 +321,16 @@ DEALINGS IN THE SOFTWARE.
 #define VIVS_MMUv2_CONFIGURATION_ADDRESS(x)			(((x) << VIVS_MMUv2_CONFIGURATION_ADDRESS__SHIFT) & VIVS_MMUv2_CONFIGURATION_ADDRESS__MASK)

 #define VIVS_MMUv2_STATUS					0x00000188
-#define VIVS_MMUv2_STATUS_EXCEPTION0__MASK			0x00000003
+#define VIVS_MMUv2_STATUS_EXCEPTION0__MASK			0x0000000f
 #define VIVS_MMUv2_STATUS_EXCEPTION0__SHIFT			0
 #define VIVS_MMUv2_STATUS_EXCEPTION0(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION0__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION0__MASK)
-#define VIVS_MMUv2_STATUS_EXCEPTION1__MASK			0x00000030
+#define VIVS_MMUv2_STATUS_EXCEPTION1__MASK			0x000000f0
 #define VIVS_MMUv2_STATUS_EXCEPTION1__SHIFT			4
 #define VIVS_MMUv2_STATUS_EXCEPTION1(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION1__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION1__MASK)
-#define VIVS_MMUv2_STATUS_EXCEPTION2__MASK			0x00000300
+#define VIVS_MMUv2_STATUS_EXCEPTION2__MASK			0x00000f00
 #define VIVS_MMUv2_STATUS_EXCEPTION2__SHIFT			8
 #define VIVS_MMUv2_STATUS_EXCEPTION2(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION2__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION2__MASK)
-#define VIVS_MMUv2_STATUS_EXCEPTION3__MASK			0x00003000
+#define VIVS_MMUv2_STATUS_EXCEPTION3__MASK			0x0000f000
 #define VIVS_MMUv2_STATUS_EXCEPTION3__SHIFT			12
 #define VIVS_MMUv2_STATUS_EXCEPTION3(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION3__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION3__MASK)

@@ -465,7 +465,13 @@ DEALINGS IN THE SOFTWARE.
 #define VIVS_MC_PROFILE_CONFIG0					0x00000470
 #define VIVS_MC_PROFILE_CONFIG0_FE__MASK			0x000000ff
 #define VIVS_MC_PROFILE_CONFIG0_FE__SHIFT			0
+#define VIVS_MC_PROFILE_CONFIG0_FE_DRAW_COUNT			0x0000000a
+#define VIVS_MC_PROFILE_CONFIG0_FE_OUT_VERTEX_COUNT		0x0000000b
+#define VIVS_MC_PROFILE_CONFIG0_FE_CACHE_MISS_COUNT		0x0000000c
 #define VIVS_MC_PROFILE_CONFIG0_FE_RESET			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG0_FE_CACHE_LK_COUNT		0x00000010
+#define VIVS_MC_PROFILE_CONFIG0_FE_STALL_COUNT			0x00000011
+#define VIVS_MC_PROFILE_CONFIG0_FE_PROCESS_COUNT		0x00000012
 #define VIVS_MC_PROFILE_CONFIG0_DE__MASK			0x0000ff00
 #define VIVS_MC_PROFILE_CONFIG0_DE__SHIFT			8
 #define VIVS_MC_PROFILE_CONFIG0_DE_RESET			0x00000f00
@@ -499,11 +505,14 @@ DEALINGS IN THE SOFTWARE.
 #define VIVS_MC_PROFILE_CONFIG1_PA_DEPTH_CLIPPED_COUNTER	0x00000006
 #define VIVS_MC_PROFILE_CONFIG1_PA_TRIVIAL_REJECTED_COUNTER	0x00000007
 #define VIVS_MC_PROFILE_CONFIG1_PA_CULLED_COUNTER		0x00000008
+#define VIVS_MC_PROFILE_CONFIG1_PA_DROPED_PRIM_COUNTER		0x00000009
+#define VIVS_MC_PROFILE_CONFIG1_PA_FRUSTUM_CLIPPED_PRIM_COUNTER	0x0000000a
 #define VIVS_MC_PROFILE_CONFIG1_PA_RESET			0x0000000f
 #define VIVS_MC_PROFILE_CONFIG1_SE__MASK			0x0000ff00
 #define VIVS_MC_PROFILE_CONFIG1_SE__SHIFT			8
 #define VIVS_MC_PROFILE_CONFIG1_SE_CULLED_TRIANGLE_COUNT	0x00000000
 #define VIVS_MC_PROFILE_CONFIG1_SE_CULLED_LINES_COUNT		0x00000100
+#define VIVS_MC_PROFILE_CONFIG1_SE_TRIVIAL_REJECTED_LINE_COUNT	0x00000400
 #define VIVS_MC_PROFILE_CONFIG1_SE_RESET			0x00000f00
 #define VIVS_MC_PROFILE_CONFIG1_RA__MASK			0x00ff0000
 #define VIVS_MC_PROFILE_CONFIG1_RA__SHIFT			16
@@ -515,6 +524,8 @@ DEALINGS IN THE SOFTWARE.
 #define VIVS_MC_PROFILE_CONFIG1_RA_PREFETCH_CACHE_MISS_COUNTER	0x000a0000
 #define VIVS_MC_PROFILE_CONFIG1_RA_CULLED_QUAD_COUNT		0x000b0000
 #define VIVS_MC_PROFILE_CONFIG1_RA_RESET			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG1_RA_PIPE_HZ_CACHE_MISS_COUNTER	0x00110000
+#define VIVS_MC_PROFILE_CONFIG1_RA_PREFETCH_HZ_CACHE_MISS_COUNTER	0x00120000
 #define VIVS_MC_PROFILE_CONFIG1_TX__MASK			0xff000000
 #define VIVS_MC_PROFILE_CONFIG1_TX__SHIFT			24
 #define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_BILINEAR_REQUESTS	0x00000000
@@ -535,13 +546,48 @@ DEALINGS IN THE SOFTWARE.
 #define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_PIPELINE	0x00000001
 #define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_IP	0x00000002
 #define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_8B_FROM_PIPELINE	0x00000003
-#define VIVS_MC_PROFILE_CONFIG2_MC_RESET			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_SENTOUT_FROM_COLORPIPE	0x00000004
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_FROM_COLORPIPE	0x00000005
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_DEPTHPIPE	0x00000007
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_SENTOUT_FROM_DEPTHPIPE	0x00000008
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_8B_FROM_DEPTHPIPE	0x00000009
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_SENTOUT_FROM_DEPTHPIPE	0x0000000a
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_FROM_DEPTHPIPE	0x0000000b
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_OTHERS	0x0000000c
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_8B_FROM_OTHERS	0x0000000d
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_FROM_OTHERS	0x0000000e
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_FROM_OTHERS	0x0000000f
+#define VIVS_MC_PROFILE_CONFIG2_MC_FE_READ_BANDWIDTH		0x00000015
+#define VIVS_MC_PROFILE_CONFIG2_MC_MMU_READ_BANDWIDTH		0x00000016
+#define VIVS_MC_PROFILE_CONFIG2_MC_BLT_READ_BANDWIDTH		0x00000017
+#define VIVS_MC_PROFILE_CONFIG2_MC_SH0_READ_BANDWIDTH		0x00000018
+#define VIVS_MC_PROFILE_CONFIG2_MC_SH1_READ_BANDWIDTH		0x00000019
+#define VIVS_MC_PROFILE_CONFIG2_MC_PE_WRITE_BANDWIDTH		0x0000001a
+#define VIVS_MC_PROFILE_CONFIG2_MC_BLT_WRITE_BANDWIDTH		0x0000001b
+#define VIVS_MC_PROFILE_CONFIG2_MC_SH0_WRITE_BANDWIDTH		0x0000001c
+#define VIVS_MC_PROFILE_CONFIG2_MC_SH1_WRITE_BANDWIDTH		0x0000001d
 #define VIVS_MC_PROFILE_CONFIG2_HI__MASK			0x0000ff00
 #define VIVS_MC_PROFILE_CONFIG2_HI__SHIFT			8
 #define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_READ_REQUEST_STALLED	0x00000000
 #define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_WRITE_REQUEST_STALLED	0x00000100
 #define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_WRITE_DATA_STALLED	0x00000200
 #define VIVS_MC_PROFILE_CONFIG2_HI_RESET			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG2_L2__MASK			0x00ff0000
+#define VIVS_MC_PROFILE_CONFIG2_L2__SHIFT			16
+#define VIVS_MC_PROFILE_CONFIG2_L2_TOTAL_AXI0_READ_REQUEST_COUNT	0x00000000
+#define VIVS_MC_PROFILE_CONFIG2_L2_TOTAL_AXI0_WRITE_REQUEST_COUNT	0x00040000
+#define VIVS_MC_PROFILE_CONFIG2_L2_TOTAL_AXI1_WRITE_REQUEST_COUNT	0x00050000
+#define VIVS_MC_PROFILE_CONFIG2_L2_TOTAL_READ_TRANSACTIONS_REQUEST_BY_AXI0	0x00080000
+#define VIVS_MC_PROFILE_CONFIG2_L2_TOTAL_READ_TRANSACTIONS_REQUEST_BY_AXI1	0x00090000
+#define VIVS_MC_PROFILE_CONFIG2_L2_TOTAL_WRITE_TRANSACTIONS_REQUEST_BY_AXI0	0x000c0000
+#define VIVS_MC_PROFILE_CONFIG2_L2_TOTAL_WRITE_TRANSACTIONS_REQUEST_BY_AXI1	0x000d0000
+#define VIVS_MC_PROFILE_CONFIG2_L2_RESET			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG2_L2_AXI0_MINMAX_LATENCY		0x00100000
+#define VIVS_MC_PROFILE_CONFIG2_L2_AXI0_TOTAL_LATENCY		0x00110000
+#define VIVS_MC_PROFILE_CONFIG2_L2_AXI0_TOTAL_REQUEST_COUNT	0x00120000
+#define VIVS_MC_PROFILE_CONFIG2_L2_AXI1_MINMAX_LATENCY		0x00130000
+#define VIVS_MC_PROFILE_CONFIG2_L2_AXI1_TOTAL_LATENCY		0x00140000
+#define VIVS_MC_PROFILE_CONFIG2_L2_AXI1_TOTAL_REQUEST_COUNT	0x00150000
 #define VIVS_MC_PROFILE_CONFIG2_BLT__MASK			0xff000000
 #define VIVS_MC_PROFILE_CONFIG2_BLT__SHIFT			24
 #define VIVS_MC_PROFILE_CONFIG2_BLT_UNK0			0x00000000
@@ -566,5 +612,13 @@ DEALINGS IN THE SOFTWARE.

 #define VIVS_MC_PROFILE_L2_READ					0x00000564

+#define VIVS_MC_MC_LATENCY_RESET				0x00000568
+
+#define VIVS_MC_MC_AXI_MAX_MIN_LATENCY				0x0000056c
+
+#define VIVS_MC_MC_AXI_TOTAL_LATENCY				0x00000570
+
+#define VIVS_MC_MC_AXI_SAMPLE_COUNT				0x00000574
+

 #endif /* STATE_HI_XML */
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -906,6 +906,12 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)

 	spin_unlock(&sched->job_list_lock);

+	if (job) {
+		job->entity->elapsed_ns += ktime_to_ns(
+			ktime_sub(job->s_fence->finished.timestamp,
+				  job->s_fence->scheduled.timestamp));
+	}
+
 	return job;
 }


--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -228,6 +228,13 @@ struct drm_sched_entity {
 	 */
 	struct rb_node			rb_tree_node;

+	/**
+	 * @elapsed_ns:
+	 *
+	 * Records the amount of time where jobs from this entity were active
+	 * on the GPU.
+	 */
+	uint64_t elapsed_ns;
 };

 /**