Merge tag 'drm-msm-next-2019-04-21' of https://gitlab.freedesktop.org/drm/msm into drm-next

This time around it is a bunch of cleanup and fixes, expanding gpu "zap" shader support (so we can take the GPU out of secure mode on boot) to a6xx, and small UABI extension to support robustness (see mesa MR 673). Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rob Clark <robdclark@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/CAF6AEGsHwsEfi4y2LYKSqeqDEYvffwVgKhiP8jHcHpxp13J5LQ@mail.gmail.com

Merge tag 'drm-msm-next-2019-04-21' of https://gitlab.freedesktop.org/drm/msm into drm-next
This time around it is a bunch of cleanup and fixes, expanding gpu "zap" shader support (so we can take the GPU out of secure mode on boot) to a6xx, and small UABI extension to support robustness (see mesa MR 673). Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rob Clark <robdclark@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/CAF6AEGsHwsEfi4y2LYKSqeqDEYvffwVgKhiP8jHcHpxp13J5LQ@mail.gmail.com
70b5f09e · Dave Airlie · 42f1a013 · b02872df · 70b5f09e · 70b5f09e
Commit 70b5f09e authored Apr 24, 2019 by Dave Airlie
32 changed files
--- a/Documentation/devicetree/bindings/display/msm/gmu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gmu.txt
@@ -24,7 +24,10 @@ Required properties:
   * "cxo"
   * "axi"
   * "mnoc"
- power-domains: should be <&clock_gpucc GPU_CX_GDSC>
+- power-domains: should be:
+	<&clock_gpucc GPU_CX_GDSC>
+	<&clock_gpucc GPU_GX_GDSC>
+- power-domain-names: Matching names for the power domains
 - iommus: phandle to the adreno iommu
 - operating-points-v2: phandle to the OPP operating points

@@ -51,7 +54,10 @@ Example:
 			<&gcc GCC_GPU_MEMNOC_GFX_CLK>;
 		clock-names = "gmu", "cxo", "axi", "memnoc";

-		power-domains = <&gpucc GPU_CX_GDSC>;
+		power-domains = <&gpucc GPU_CX_GDSC>,
+				<&gpucc GPU_GX_GDSC>;
+		power-domain-names = "cx", "gx";
+
 		iommus = <&adreno_smmu 5>;

 		operating-points-v2 = <&gmu_opp_table>;

--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@@ -22,9 +22,14 @@ Required properties:
   - qcom,adreno-630.2
 - iommus: optional phandle to an adreno iommu instance
 - operating-points-v2: optional phandle to the OPP operating points
+- interconnects: optional phandle to an interconnect provider.  See
+  ../interconnect/interconnect.txt for details.
 - qcom,gmu: For GMU attached devices a phandle to the GMU device that will
  control the power for the GPU. Applicable targets:
    - qcom,adreno-630.2
+- zap-shader: For a5xx and a6xx devices this node contains a memory-region that
+  points to reserved memory to store the zap shader that can be used to help
+  bring the GPU out of secure mode.

 Example 3xx/4xx/a5xx:

@@ -70,6 +75,12 @@ Example a6xx (with GMU):

 		operating-points-v2 = <&gpu_opp_table>;

+		interconnects = <&rsc_hlos MASTER_GFX3D &rsc_hlos SLAVE_EBI1>;
+
 		qcom,gmu = <&gmu>;
+
+		zap-shader {
+			memory-region = <&zap_shader_region>;
+		};
 	};
 };
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -21,6 +21,11 @@ config DRM_MSM
 	help
 	  DRM/KMS driver for MSM/snapdragon.

+config DRM_MSM_GPU_STATE
+	bool
+	depends on DRM_MSM && (DEBUG_FS || DEV_COREDUMP)
+	default y
+
 config DRM_MSM_REGISTER_LOGGING
 	bool "MSM DRM register logging"
 	depends on DRM_MSM

--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -15,7 +15,6 @@ msm-y := \
 	adreno/a6xx_gpu.o \
 	adreno/a6xx_gmu.o \
 	adreno/a6xx_hfi.o \
-	adreno/a6xx_gpu_state.o \
 	hdmi/hdmi.o \
 	hdmi/hdmi_audio.o \
 	hdmi/hdmi_bridge.o \
@@ -96,6 +95,8 @@ msm-y := \

 msm-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o

+msm-$(CONFIG_DRM_MSM_GPU_STATE)	+= adreno/a6xx_gpu_state.o
+
 msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o
 msm-$(CONFIG_COMMON_CLK) += disp/mdp4/mdp4_lvds_pll.o
 msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_pll_8960.o

--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -15,9 +15,6 @@
 #include <linux/types.h>
 #include <linux/cpumask.h>
 #include <linux/qcom_scm.h>
-#include <linux/dma-mapping.h>
-#include <linux/of_address.h>
-#include <linux/soc/qcom/mdt_loader.h>
 #include <linux/pm_opp.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/slab.h>
@@ -30,94 +27,6 @@ static void a5xx_dump(struct msm_gpu *gpu);

 #define GPU_PAS_ID 13

-static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
-{
-	struct device *dev = &gpu->pdev->dev;
-	const struct firmware *fw;
-	struct device_node *np;
-	struct resource r;
-	phys_addr_t mem_phys;
-	ssize_t mem_size;
-	void *mem_region = NULL;
-	int ret;
-
-	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
-		return -EINVAL;
-
-	np = of_get_child_by_name(dev->of_node, "zap-shader");
-	if (!np)
-		return -ENODEV;
-
-	np = of_parse_phandle(np, "memory-region", 0);
-	if (!np)
-		return -EINVAL;
-
-	ret = of_address_to_resource(np, 0, &r);
-	if (ret)
-		return ret;
-
-	mem_phys = r.start;
-	mem_size = resource_size(&r);
-
-	/* Request the MDT file for the firmware */
-	fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
-	if (IS_ERR(fw)) {
-		DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
-		return PTR_ERR(fw);
-	}
-
-	/* Figure out how much memory we need */
-	mem_size = qcom_mdt_get_size(fw);
-	if (mem_size < 0) {
-		ret = mem_size;
-		goto out;
-	}
-
-	/* Allocate memory for the firmware image */
-	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
-	if (!mem_region) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	/*
-	 * Load the rest of the MDT
-	 *
-	 * Note that we could be dealing with two different paths, since
-	 * with upstream linux-firmware it would be in a qcom/ subdir..
-	 * adreno_request_fw() handles this, but qcom_mdt_load() does
-	 * not.  But since we've already gotten thru adreno_request_fw()
-	 * we know which of the two cases it is:
-	 */
-	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
-		ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
-				mem_region, mem_phys, mem_size, NULL);
-	} else {
-		char *newname;
-
-		newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
-
-		ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
-				mem_region, mem_phys, mem_size, NULL);
-		kfree(newname);
-	}
-	if (ret)
-		goto out;
-
-	/* Send the image to the secure world */
-	ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
-	if (ret)
-		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
-
-out:
-	if (mem_region)
-		memunmap(mem_region);
-
-	release_firmware(fw);
-
-	return ret;
-}
-
 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -563,8 +472,6 @@ static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 {
 	static bool loaded;
-	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-	struct platform_device *pdev = gpu->pdev;
 	int ret;

 	/*
@@ -574,23 +481,9 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 	if (loaded)
 		return a5xx_zap_shader_resume(gpu);

-	/* We need SCM to be able to load the firmware */
-	if (!qcom_scm_is_available()) {
-		DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
-		return -EPROBE_DEFER;
-	}
-
-	/* Each GPU has a target specific zap shader firmware name to use */
-	if (!adreno_gpu->info->zapfw) {
-		DRM_DEV_ERROR(&pdev->dev,
-			"Zap shader firmware file not specified for this target\n");
-		return -ENODEV;
-	}
-
-	ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
+	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);

 	loaded = !ret;
-
 	return ret;
 }


--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -27,9 +27,6 @@ struct a6xx_gmu_bo {
 /* the GMU is coming up for the first time or back from a power collapse */
 #define GMU_COLD_BOOT 1

-/* The GMU is being soft reset after a fault */
-#define GMU_RESET 2
-
 /*
 * These define the level of control that the GMU has - the higher the number
 * the more things that the GMU hardware controls on its own.
@@ -52,11 +49,11 @@ struct a6xx_gmu {
 	int hfi_irq;
 	int gmu_irq;

-	struct regulator *gx;
-
 	struct iommu_domain *domain;
 	u64 uncached_iova_base;

+	struct device *gxpd;
+
 	int idle_level;

 	struct a6xx_gmu_bo *hfi;
@@ -78,7 +75,7 @@ struct a6xx_gmu {

 	struct a6xx_hfi_queue queues[2];

-	struct tasklet_struct hfi_tasklet;
+	bool hung;
 };

 static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)

--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -10,6 +10,8 @@

 #include <linux/devfreq.h>

+#define GPU_PAS_ID 13
+
 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -343,6 +345,20 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
 	return 0;
 }

+static int a6xx_zap_shader_init(struct msm_gpu *gpu)
+{
+	static bool loaded;
+	int ret;
+
+	if (loaded)
+		return 0;
+
+	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
+
+	loaded = !ret;
+	return ret;
+}
+
 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
 	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
 	  A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
@@ -491,7 +507,27 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
 	if (ret)
 		goto out;

-	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+	/*
+	 * Try to load a zap shader into the secure world. If successful
+	 * we can use the CP to switch out of secure mode. If not then we
+	 * have no resource but to try to switch ourselves out manually. If we
+	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
+	 * be blocked and a permissions violation will soon follow.
+	 */
+	ret = a6xx_zap_shader_init(gpu);
+	if (!ret) {
+		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
+		OUT_RING(gpu->rb[0], 0x00000000);
+
+		a6xx_flush(gpu, gpu->rb[0]);
+		if (!a6xx_idle(gpu, gpu->rb[0]))
+			return -EINVAL;
+	} else {
+		/* Print a warning so if we die, we know why */
+		dev_warn_once(gpu->dev->dev,
+			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
+		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+	}

 out:
 	/*
@@ -678,13 +714,15 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 	int ret;

-	ret = a6xx_gmu_resume(a6xx_gpu);
-
 	gpu->needs_hw_init = true;

+	ret = a6xx_gmu_resume(a6xx_gpu);
+	if (ret)
+		return ret;
+
 	msm_gpu_resume_devfreq(gpu);

-	return ret;
+	return 0;
 }

 static int a6xx_pm_suspend(struct msm_gpu *gpu)
@@ -694,18 +732,6 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)

 	devfreq_suspend_device(gpu->devfreq.devfreq);

-	/*
-	 * Make sure the GMU is idle before continuing (because some transitions
-	 * may use VBIF
-	 */
-	a6xx_gmu_wait_for_idle(a6xx_gpu);
-
-	/* Clear the VBIF pipe before shutting down */
-	/* FIXME: This accesses the GPU - do we need to make sure it is on? */
-	gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
-	spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) == 0xf);
-	gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
-
 	return a6xx_gmu_stop(a6xx_gpu);
 }

@@ -781,14 +807,16 @@ static const struct adreno_gpu_funcs funcs = {
 		.active_ring = a6xx_active_ring,
 		.irq = a6xx_irq,
 		.destroy = a6xx_destroy,
-#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
+#if defined(CONFIG_DRM_MSM_GPU_STATE)
 		.show = a6xx_show,
 #endif
 		.gpu_busy = a6xx_gpu_busy,
 		.gpu_get_freq = a6xx_gmu_get_freq,
 		.gpu_set_freq = a6xx_gmu_set_freq,
+#if defined(CONFIG_DRM_MSM_GPU_STATE)
 		.gpu_state_get = a6xx_gpu_state_get,
 		.gpu_state_put = a6xx_gpu_state_put,
+#endif
 	},
 	.get_timestamp = a6xx_get_timestamp,
 };

--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -46,9 +46,8 @@ struct a6xx_gpu {
 int a6xx_gmu_resume(struct a6xx_gpu *gpu);
 int a6xx_gmu_stop(struct a6xx_gpu *gpu);

-int a6xx_gmu_wait_for_idle(struct a6xx_gpu *gpu);
+int a6xx_gmu_wait_for_idle(struct a6xx_gmu *gmu);

-int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu);
 bool a6xx_gmu_isidle(struct a6xx_gmu *gmu);

 int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);

--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -155,6 +155,7 @@ static const struct adreno_info gpulist[] = {
 		.gmem = SZ_1M,
 		.inactive_period = DRM_MSM_INACTIVE_PERIOD,
 		.init = a6xx_gpu_init,
+		.zapfw = "a630_zap.mdt",
 	},
 };

@@ -229,6 +230,7 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)

 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
+		pm_runtime_put_sync(&pdev->dev);
 		DRM_DEV_ERROR(dev->dev, "Couldn't power up the GPU: %d\n", ret);
 		return NULL;
 	}

--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -19,13 +19,148 @@

 #include <linux/ascii85.h>
 #include <linux/interconnect.h>
+#include <linux/qcom_scm.h>
 #include <linux/kernel.h>
+#include <linux/of_address.h>
 #include <linux/pm_opp.h>
 #include <linux/slab.h>
+#include <linux/soc/qcom/mdt_loader.h>
 #include "adreno_gpu.h"
 #include "msm_gem.h"
 #include "msm_mmu.h"

+static bool zap_available = true;
+
+static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
+		u32 pasid)
+{
+	struct device *dev = &gpu->pdev->dev;
+	const struct firmware *fw;
+	struct device_node *np, *mem_np;
+	struct resource r;
+	phys_addr_t mem_phys;
+	ssize_t mem_size;
+	void *mem_region = NULL;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_ARCH_QCOM)) {
+		zap_available = false;
+		return -EINVAL;
+	}
+
+	np = of_get_child_by_name(dev->of_node, "zap-shader");
+	if (!np) {
+		zap_available = false;
+		return -ENODEV;
+	}
+
+	mem_np = of_parse_phandle(np, "memory-region", 0);
+	of_node_put(np);
+	if (!mem_np) {
+		zap_available = false;
+		return -EINVAL;
+	}
+
+	ret = of_address_to_resource(mem_np, 0, &r);
+	of_node_put(mem_np);
+	if (ret)
+		return ret;
+
+	mem_phys = r.start;
+	mem_size = resource_size(&r);
+
+	/* Request the MDT file for the firmware */
+	fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
+	if (IS_ERR(fw)) {
+		DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
+		return PTR_ERR(fw);
+	}
+
+	/* Figure out how much memory we need */
+	mem_size = qcom_mdt_get_size(fw);
+	if (mem_size < 0) {
+		ret = mem_size;
+		goto out;
+	}
+
+	/* Allocate memory for the firmware image */
+	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
+	if (!mem_region) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Load the rest of the MDT
+	 *
+	 * Note that we could be dealing with two different paths, since
+	 * with upstream linux-firmware it would be in a qcom/ subdir..
+	 * adreno_request_fw() handles this, but qcom_mdt_load() does
+	 * not.  But since we've already gotten through adreno_request_fw()
+	 * we know which of the two cases it is:
+	 */
+	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
+		ret = qcom_mdt_load(dev, fw, fwname, pasid,
+				mem_region, mem_phys, mem_size, NULL);
+	} else {
+		char *newname;
+
+		newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
+
+		ret = qcom_mdt_load(dev, fw, newname, pasid,
+				mem_region, mem_phys, mem_size, NULL);
+		kfree(newname);
+	}
+	if (ret)
+		goto out;
+
+	/* Send the image to the secure world */
+	ret = qcom_scm_pas_auth_and_reset(pasid);
+
+	/*
+	 * If the scm call returns -EOPNOTSUPP we assume that this target
+	 * doesn't need/support the zap shader so quietly fail
+	 */
+	if (ret == -EOPNOTSUPP)
+		zap_available = false;
+	else if (ret)
+		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
+
+out:
+	if (mem_region)
+		memunmap(mem_region);
+
+	release_firmware(fw);
+
+	return ret;
+}
+
+int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct platform_device *pdev = gpu->pdev;
+
+	/* Short cut if we determine the zap shader isn't available/needed */
+	if (!zap_available)
+		return -ENODEV;
+
+	/* We need SCM to be able to load the firmware */
+	if (!qcom_scm_is_available()) {
+		DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
+		return -EPROBE_DEFER;
+	}
+
+	/* Each GPU has a target specific zap shader firmware name to use */
+	if (!adreno_gpu->info->zapfw) {
+		zap_available = false;
+		DRM_DEV_ERROR(&pdev->dev,
+			"Zap shader firmware file not specified for this target\n");
+		return -ENODEV;
+	}
+
+	return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid);
+}
+
 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -63,6 +198,12 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 	case MSM_PARAM_NR_RINGS:
 		*value = gpu->nr_rings;
 		return 0;
+	case MSM_PARAM_PP_PGTABLE:
+		*value = 0;
+		return 0;
+	case MSM_PARAM_FAULTS:
+		*value = gpu->global_faults;
+		return 0;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
 		return -EINVAL;

--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -252,6 +252,12 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state);
 int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state);
 int adreno_gpu_state_put(struct msm_gpu_state *state);

+/*
+ * For a5xx and a6xx targets load the zap shader that is used to pull the GPU
+ * out of secure mode
+ */
+int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid);
+
 /* ringbuffer helpers (the parts that are adreno specific) */

 static inline void

--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -46,6 +46,9 @@
 #define LEFT_MIXER 0
 #define RIGHT_MIXER 1

+/* timeout in ms waiting for frame done */
+#define DPU_CRTC_FRAME_DONE_TIMEOUT_MS	60
+
 static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc)
 {
 	struct msm_drm_private *priv = crtc->dev->dev_private;
@@ -425,65 +428,6 @@ void dpu_crtc_complete_commit(struct drm_crtc *crtc,
 	trace_dpu_crtc_complete_commit(DRMID(crtc));
 }

-static void _dpu_crtc_setup_mixer_for_encoder(
-		struct drm_crtc *crtc,
-		struct drm_encoder *enc)
-{
-	struct dpu_crtc_state *cstate = to_dpu_crtc_state(crtc->state);
-	struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc);
-	struct dpu_rm *rm = &dpu_kms->rm;
-	struct dpu_crtc_mixer *mixer;
-	struct dpu_hw_ctl *last_valid_ctl = NULL;
-	int i;
-	struct dpu_rm_hw_iter lm_iter, ctl_iter;
-
-	dpu_rm_init_hw_iter(&lm_iter, enc->base.id, DPU_HW_BLK_LM);
-	dpu_rm_init_hw_iter(&ctl_iter, enc->base.id, DPU_HW_BLK_CTL);
-
-	/* Set up all the mixers and ctls reserved by this encoder */
-	for (i = cstate->num_mixers; i < ARRAY_SIZE(cstate->mixers); i++) {
-		mixer = &cstate->mixers[i];
-
-		if (!dpu_rm_get_hw(rm, &lm_iter))
-			break;
-		mixer->hw_lm = (struct dpu_hw_mixer *)lm_iter.hw;
-
-		/* CTL may be <= LMs, if <, multiple LMs controlled by 1 CTL */
-		if (!dpu_rm_get_hw(rm, &ctl_iter)) {
-			DPU_DEBUG("no ctl assigned to lm %d, using previous\n",
-					mixer->hw_lm->idx - LM_0);
-			mixer->lm_ctl = last_valid_ctl;
-		} else {
-			mixer->lm_ctl = (struct dpu_hw_ctl *)ctl_iter.hw;
-			last_valid_ctl = mixer->lm_ctl;
-		}
-
-		/* Shouldn't happen, mixers are always >= ctls */
-		if (!mixer->lm_ctl) {
-			DPU_ERROR("no valid ctls found for lm %d\n",
-					mixer->hw_lm->idx - LM_0);
-			return;
-		}
-
-		cstate->num_mixers++;
-		DPU_DEBUG("setup mixer %d: lm %d\n",
-				i, mixer->hw_lm->idx - LM_0);
-		DPU_DEBUG("setup mixer %d: ctl %d\n",
-				i, mixer->lm_ctl->idx - CTL_0);
-	}
-}
-
-static void _dpu_crtc_setup_mixers(struct drm_crtc *crtc)
-{
-	struct drm_encoder *enc;
-
-	WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
-
-	/* Check for mixers on all encoders attached to this crtc */
-	drm_for_each_encoder_mask(enc, crtc->dev, crtc->state->encoder_mask)
-		_dpu_crtc_setup_mixer_for_encoder(crtc, enc);
-}
-
 static void _dpu_crtc_setup_lm_bounds(struct drm_crtc *crtc,
 		struct drm_crtc_state *state)
 {
@@ -533,10 +477,7 @@ static void dpu_crtc_atomic_begin(struct drm_crtc *crtc,
 	dev = crtc->dev;
 	smmu_state = &dpu_crtc->smmu_state;

-	if (!cstate->num_mixers) {
-		_dpu_crtc_setup_mixers(crtc);
-		_dpu_crtc_setup_lm_bounds(crtc, crtc->state);
-	}
+	_dpu_crtc_setup_lm_bounds(crtc, crtc->state);

 	if (dpu_crtc->event) {
 		WARN_ON(dpu_crtc->event);
@@ -683,7 +624,7 @@ static int _dpu_crtc_wait_for_frame_done(struct drm_crtc *crtc)

 	DPU_ATRACE_BEGIN("frame done completion wait");
 	ret = wait_for_completion_timeout(&dpu_crtc->frame_done_comp,
-			msecs_to_jiffies(DPU_FRAME_DONE_TIMEOUT));
+			msecs_to_jiffies(DPU_CRTC_FRAME_DONE_TIMEOUT_MS));
 	if (!ret) {
 		DRM_ERROR("frame done wait timed out, ret:%d\n", ret);
 		rc = -ETIMEDOUT;

--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -69,6 +69,9 @@

 #define MAX_VDISPLAY_SPLIT 1080

+/* timeout in frames waiting for frame done */
+#define DPU_ENCODER_FRAME_DONE_TIMEOUT_FRAMES 5
+
 /**
 * enum dpu_enc_rc_events - events for resource control state machine
 * @DPU_ENC_RC_EVENT_KICKOFF:
@@ -158,7 +161,7 @@ enum dpu_enc_rc_states {
 *				Bit0 = phys_encs[0] etc.
 * @crtc_frame_event_cb:	callback handler for frame event
 * @crtc_frame_event_cb_data:	callback handler private data
- * @frame_done_timeout:		frame done timeout in Hz
+ * @frame_done_timeout_ms:	frame done timeout in ms
 * @frame_done_timer:		watchdog timer for frame done event
 * @vsync_event_timer:		vsync timer
 * @disp_info:			local copy of msm_display_info struct
@@ -196,7 +199,7 @@ struct dpu_encoder_virt {
 	void (*crtc_frame_event_cb)(void *, u32 event);
 	void *crtc_frame_event_cb_data;

-	atomic_t frame_done_timeout;
+	atomic_t frame_done_timeout_ms;
 	struct timer_list frame_done_timer;
 	struct timer_list vsync_event_timer;

@@ -520,8 +523,8 @@ static void _dpu_encoder_adjust_mode(struct drm_connector *connector,

 	list_for_each_entry(cur_mode, &connector->modes, head) {
 		if (cur_mode->vdisplay == adj_mode->vdisplay &&
-			cur_mode->hdisplay == adj_mode->hdisplay &&
-			cur_mode->vrefresh == adj_mode->vrefresh) {
+		    cur_mode->hdisplay == adj_mode->hdisplay &&
+		    drm_mode_vrefresh(cur_mode) == drm_mode_vrefresh(adj_mode)) {
 			adj_mode->private = cur_mode->private;
 			adj_mode->private_flags |= cur_mode->private_flags;
 		}
@@ -959,10 +962,14 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc,
 	struct dpu_kms *dpu_kms;
 	struct list_head *connector_list;
 	struct drm_connector *conn = NULL, *conn_iter;
-	struct dpu_rm_hw_iter pp_iter, ctl_iter;
+	struct drm_crtc *drm_crtc;
+	struct dpu_crtc_state *cstate;
+	struct dpu_rm_hw_iter hw_iter;
 	struct msm_display_topology topology;
 	struct dpu_hw_ctl *hw_ctl[MAX_CHANNELS_PER_ENC] = { NULL };
-	int i = 0, ret;
+	struct dpu_hw_mixer *hw_lm[MAX_CHANNELS_PER_ENC] = { NULL };
+	int num_lm = 0, num_ctl = 0;
+	int i, j, ret;

 	if (!drm_enc) {
 		DPU_ERROR("invalid encoder\n");
@@ -990,10 +997,14 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc,
 		return;
 	}

+	drm_for_each_crtc(drm_crtc, drm_enc->dev)
+		if (drm_crtc->state->encoder_mask & drm_encoder_mask(drm_enc))
+			break;
+
 	topology = dpu_encoder_get_topology(dpu_enc, dpu_kms, adj_mode);

 	/* Reserve dynamic resources now. Indicating non-AtomicTest phase */
-	ret = dpu_rm_reserve(&dpu_kms->rm, drm_enc, drm_enc->crtc->state,
+	ret = dpu_rm_reserve(&dpu_kms->rm, drm_enc, drm_crtc->state,
 			     topology, false);
 	if (ret) {
 		DPU_ERROR_ENC(dpu_enc,
@@ -1001,21 +1012,41 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc,
 		return;
 	}

-	dpu_rm_init_hw_iter(&pp_iter, drm_enc->base.id, DPU_HW_BLK_PINGPONG);
+	dpu_rm_init_hw_iter(&hw_iter, drm_enc->base.id, DPU_HW_BLK_PINGPONG);
 	for (i = 0; i < MAX_CHANNELS_PER_ENC; i++) {
 		dpu_enc->hw_pp[i] = NULL;
-		if (!dpu_rm_get_hw(&dpu_kms->rm, &pp_iter))
+		if (!dpu_rm_get_hw(&dpu_kms->rm, &hw_iter))
+			break;
+		dpu_enc->hw_pp[i] = (struct dpu_hw_pingpong *) hw_iter.hw;
+	}
+
+	dpu_rm_init_hw_iter(&hw_iter, drm_enc->base.id, DPU_HW_BLK_CTL);
+	for (i = 0; i < MAX_CHANNELS_PER_ENC; i++) {
+		if (!dpu_rm_get_hw(&dpu_kms->rm, &hw_iter))
 			break;
-		dpu_enc->hw_pp[i] = (struct dpu_hw_pingpong *) pp_iter.hw;
+		hw_ctl[i] = (struct dpu_hw_ctl *)hw_iter.hw;
+		num_ctl++;
 	}

-	dpu_rm_init_hw_iter(&ctl_iter, drm_enc->base.id, DPU_HW_BLK_CTL);
+	dpu_rm_init_hw_iter(&hw_iter, drm_enc->base.id, DPU_HW_BLK_LM);
 	for (i = 0; i < MAX_CHANNELS_PER_ENC; i++) {
-		if (!dpu_rm_get_hw(&dpu_kms->rm, &ctl_iter))
+		if (!dpu_rm_get_hw(&dpu_kms->rm, &hw_iter))
 			break;
-		hw_ctl[i] = (struct dpu_hw_ctl *)ctl_iter.hw;
+		hw_lm[i] = (struct dpu_hw_mixer *)hw_iter.hw;
+		num_lm++;
 	}

+	cstate = to_dpu_crtc_state(drm_crtc->state);
+
+	for (i = 0; i < num_lm; i++) {
+		int ctl_idx = (i < num_ctl) ? i : (num_ctl-1);
+
+		cstate->mixers[i].hw_lm = hw_lm[i];
+		cstate->mixers[i].lm_ctl = hw_ctl[ctl_idx];
+	}
+
+	cstate->num_mixers = num_lm;
+
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];

@@ -1023,18 +1054,38 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc,
 			if (!dpu_enc->hw_pp[i]) {
 				DPU_ERROR_ENC(dpu_enc, "no pp block assigned"
 					     "at idx: %d\n", i);
-				return;
+				goto error;
 			}

 			if (!hw_ctl[i]) {
 				DPU_ERROR_ENC(dpu_enc, "no ctl block assigned"
 					     "at idx: %d\n", i);
-				return;
+				goto error;
 			}

 			phys->hw_pp = dpu_enc->hw_pp[i];
 			phys->hw_ctl = hw_ctl[i];

+			dpu_rm_init_hw_iter(&hw_iter, drm_enc->base.id,
+					    DPU_HW_BLK_INTF);
+			for (j = 0; j < MAX_CHANNELS_PER_ENC; j++) {
+				struct dpu_hw_intf *hw_intf;
+
+				if (!dpu_rm_get_hw(&dpu_kms->rm, &hw_iter))
+					break;
+
+				hw_intf = (struct dpu_hw_intf *)hw_iter.hw;
+				if (hw_intf->idx == phys->intf_idx)
+					phys->hw_intf = hw_intf;
+			}
+
+			if (!phys->hw_intf) {
+				DPU_ERROR_ENC(dpu_enc,
+					      "no intf block assigned at idx: %d\n",
+					      i);
+				goto error;
+			}
+
 			phys->connector = conn->state->connector;
 			if (phys->ops.mode_set)
 				phys->ops.mode_set(phys, mode, adj_mode);
@@ -1042,6 +1093,9 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc,
 	}

 	dpu_enc->mode_set_complete = true;
+
+error:
+	dpu_rm_release(&dpu_kms->rm, drm_enc);
 }

 static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc)
@@ -1182,7 +1236,7 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc)
 	}

 	/* after phys waits for frame-done, should be no more frames pending */
-	if (atomic_xchg(&dpu_enc->frame_done_timeout, 0)) {
+	if (atomic_xchg(&dpu_enc->frame_done_timeout_ms, 0)) {
 		DPU_ERROR("enc%d timeout pending\n", drm_enc->base.id);
 		del_timer_sync(&dpu_enc->frame_done_timer);
 	}
@@ -1339,7 +1393,7 @@ static void dpu_encoder_frame_done_callback(
 		}

 		if (!dpu_enc->frame_busy_mask[0]) {
-			atomic_set(&dpu_enc->frame_done_timeout, 0);
+			atomic_set(&dpu_enc->frame_done_timeout_ms, 0);
 			del_timer(&dpu_enc->frame_done_timer);

 			dpu_encoder_resource_control(drm_enc,
@@ -1547,8 +1601,14 @@ static void _dpu_encoder_kickoff_phys(struct dpu_encoder_virt *dpu_enc,
 		if (!ctl)
 			continue;

-		if (phys->split_role != ENC_ROLE_SLAVE)
+		/*
+		 * This is cleared in frame_done worker, which isn't invoked
+		 * for async commits. So don't set this for async, since it'll
+		 * roll over to the next commit.
+		 */
+		if (!async && phys->split_role != ENC_ROLE_SLAVE)
 			set_bit(i, dpu_enc->frame_busy_mask);
+
 		if (!phys->ops.needs_single_flush ||
 				!phys->ops.needs_single_flush(phys))
 			_dpu_encoder_trigger_flush(&dpu_enc->base, phys, 0x0,
@@ -1800,11 +1860,20 @@ void dpu_encoder_kickoff(struct drm_encoder *drm_enc, bool async)

 	trace_dpu_enc_kickoff(DRMID(drm_enc));

-	atomic_set(&dpu_enc->frame_done_timeout,
-			DPU_FRAME_DONE_TIMEOUT * 1000 /
-			drm_enc->crtc->state->adjusted_mode.vrefresh);
-	mod_timer(&dpu_enc->frame_done_timer, jiffies +
-		((atomic_read(&dpu_enc->frame_done_timeout) * HZ) / 1000));
+	/*
+	 * Asynchronous frames don't handle FRAME_DONE events. As such, they
+	 * shouldn't enable the frame_done watchdog since it will always time
+	 * out.
+	 */
+	if (!async) {
+		unsigned long timeout_ms;
+		timeout_ms = DPU_ENCODER_FRAME_DONE_TIMEOUT_FRAMES * 1000 /
+			drm_mode_vrefresh(&drm_enc->crtc->state->adjusted_mode);
+
+		atomic_set(&dpu_enc->frame_done_timeout_ms, timeout_ms);
+		mod_timer(&dpu_enc->frame_done_timer,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}

 	/* All phys encs are ready to go, trigger the kickoff */
 	_dpu_encoder_kickoff_phys(dpu_enc, async);
@@ -2124,7 +2193,7 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t)
 		DRM_DEBUG_KMS("id:%u invalid timeout frame_busy_mask=%lu\n",
 			      DRMID(drm_enc), dpu_enc->frame_busy_mask[0]);
 		return;
-	} else if (!atomic_xchg(&dpu_enc->frame_done_timeout, 0)) {
+	} else if (!atomic_xchg(&dpu_enc->frame_done_timeout_ms, 0)) {
 		DRM_DEBUG_KMS("id:%u invalid timeout\n", DRMID(drm_enc));
 		return;
 	}
@@ -2170,7 +2239,7 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc,

 	spin_lock_init(&dpu_enc->enc_spinlock);

-	atomic_set(&dpu_enc->frame_done_timeout, 0);
+	atomic_set(&dpu_enc->frame_done_timeout_ms, 0);
 	timer_setup(&dpu_enc->frame_done_timer,
 			dpu_encoder_frame_done_timeout, 0);


--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -200,6 +200,7 @@ struct dpu_encoder_irq {
 * @hw_mdptop:		Hardware interface to the top registers
 * @hw_ctl:		Hardware interface to the ctl registers
 * @hw_pp:		Hardware interface to the ping pong registers
+ * @hw_intf:		Hardware interface to the intf registers
 * @dpu_kms:		Pointer to the dpu_kms top level
 * @cached_mode:	DRM mode cached at mode_set time, acted on in enable
 * @enabled:		Whether the encoder has enabled and running a mode
@@ -228,6 +229,7 @@ struct dpu_encoder_phys {
 	struct dpu_hw_mdp *hw_mdptop;
 	struct dpu_hw_ctl *hw_ctl;
 	struct dpu_hw_pingpong *hw_pp;
+	struct dpu_hw_intf *hw_intf;
 	struct dpu_kms *dpu_kms;
 	struct drm_display_mode cached_mode;
 	enum dpu_enc_split_role split_role;
@@ -250,19 +252,6 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
 	return atomic_inc_return(&phys->pending_kickoff_cnt);
 }

-/**
- * struct dpu_encoder_phys_vid - sub-class of dpu_encoder_phys to handle video
- *	mode specific operations
- * @base:	Baseclass physical encoder structure
- * @hw_intf:	Hardware interface to the intf registers
- * @timing_params: Current timing parameter
- */
-struct dpu_encoder_phys_vid {
-	struct dpu_encoder_phys base;
-	struct dpu_hw_intf *hw_intf;
-	struct intf_timing_params timing_params;
-};
-
 /**
 * struct dpu_encoder_phys_cmd - sub-class of dpu_encoder_phys to handle command
 *	mode specific operations

--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -404,7 +404,8 @@ static void dpu_encoder_phys_cmd_tearcheck_config(
 		return;
 	}

-	tc_cfg.vsync_count = vsync_hz / (mode->vtotal * mode->vrefresh);
+	tc_cfg.vsync_count = vsync_hz /
+				(mode->vtotal * drm_mode_vrefresh(mode));

 	/* enable external TE after kickoff to avoid premature autorefresh */
 	tc_cfg.hw_vsync_mode = 0;
@@ -424,7 +425,7 @@ static void dpu_encoder_phys_cmd_tearcheck_config(
 	DPU_DEBUG_CMDENC(cmd_enc,
 		"tc %d vsync_clk_speed_hz %u vtotal %u vrefresh %u\n",
 		phys_enc->hw_pp->idx - PINGPONG_0, vsync_hz,
-		mode->vtotal, mode->vrefresh);
+		mode->vtotal, drm_mode_vrefresh(mode));
 	DPU_DEBUG_CMDENC(cmd_enc,
 		"tc %d enable %u start_pos %u rd_ptr_irq %u\n",
 		phys_enc->hw_pp->idx - PINGPONG_0, tc_enable, tc_cfg.start_pos,

--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -73,9 +73,6 @@

 #define DPU_NAME_SIZE  12

-/* timeout in frames waiting for frame done */
-#define DPU_FRAME_DONE_TIMEOUT	60
-
 /*
 * struct dpu_irq_callback - IRQ callback handlers
 * @list: list to callback

--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -387,7 +387,7 @@ static void _dpu_plane_set_ot_limit(struct drm_plane *plane,
 	ot_params.width = drm_rect_width(&pdpu->pipe_cfg.src_rect);
 	ot_params.height = drm_rect_height(&pdpu->pipe_cfg.src_rect);
 	ot_params.is_wfd = !pdpu->is_rt_pipe;
-	ot_params.frame_rate = crtc->mode.vrefresh;
+	ot_params.frame_rate = drm_mode_vrefresh(&crtc->mode);
 	ot_params.vbif_idx = VBIF_RT;
 	ot_params.clk_ctrl = pdpu->pipe_hw->cap->clk_ctrl;
 	ot_params.rd = true;

--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cmd_encoder.c
@@ -59,10 +59,10 @@ static int pingpong_tearcheck_setup(struct drm_encoder *encoder,
 		return -EINVAL;
 	}

-	total_lines_x100 = mode->vtotal * mode->vrefresh;
+	total_lines_x100 = mode->vtotal * drm_mode_vrefresh(mode);
 	if (!total_lines_x100) {
 		DRM_DEV_ERROR(dev, "%s: vtotal(%d) or vrefresh(%d) is 0\n",
-				__func__, mode->vtotal, mode->vrefresh);
+			      __func__, mode->vtotal, drm_mode_vrefresh(mode));
 		return -EINVAL;
 	}


--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -75,7 +75,7 @@ static int msm_gpu_open(struct inode *inode, struct file *file)
 	struct msm_gpu_show_priv *show_priv;
 	int ret;

-	if (!gpu)
+	if (!gpu || !gpu->funcs->gpu_state_get)
 		return -ENODEV;

 	show_priv = kmalloc(sizeof(*show_priv), GFP_KERNEL);

--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -39,9 +39,10 @@
 *           MSM_GEM_INFO ioctl.
 * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
 *           GEM object's debug name
+ * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
 */
 #define MSM_VERSION_MAJOR	1
-#define MSM_VERSION_MINOR	4
+#define MSM_VERSION_MINOR	5
 #define MSM_VERSION_PATCHLEVEL	0

 static const struct drm_mode_config_funcs mode_config_funcs = {
@@ -457,6 +458,9 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)

 	priv->wq = alloc_ordered_workqueue("msm", 0);

+	INIT_WORK(&priv->free_work, msm_gem_free_work);
+	init_llist_head(&priv->free_list);
+
 	INIT_LIST_HEAD(&priv->inactive_list);

 	drm_mode_config_init(ddev);
@@ -964,6 +968,11 @@ static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data,
 		args->flags, &args->id);
 }

+static int msm_ioctl_submitqueue_query(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	return msm_submitqueue_query(dev, file->driver_priv, data);
+}

 static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
 		struct drm_file *file)
@@ -984,6 +993,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  msm_ioctl_gem_madvise,  DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,   msm_ioctl_submitqueue_new,   DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_AUTH|DRM_RENDER_ALLOW),
 };

 static const struct vm_operations_struct vm_ops = {
@@ -1019,7 +1029,7 @@ static struct drm_driver msm_driver = {
 	.irq_uninstall      = msm_irq_uninstall,
 	.enable_vblank      = msm_enable_vblank,
 	.disable_vblank     = msm_disable_vblank,
-	.gem_free_object    = msm_gem_free_object,
+	.gem_free_object_unlocked = msm_gem_free_object,
 	.gem_vm_ops         = &vm_ops,
 	.dumb_create        = msm_gem_dumb_create,
 	.dumb_map_offset    = msm_gem_dumb_map_offset,

--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -185,6 +185,10 @@ struct msm_drm_private {
 	/* list of GEM objects: */
 	struct list_head inactive_list;

+	/* worker for delayed free of objects: */
+	struct work_struct free_work;
+	struct llist_head free_list;
+
 	struct workqueue_struct *wq;

 	unsigned int num_planes;
@@ -324,6 +328,7 @@ void msm_gem_kernel_put(struct drm_gem_object *bo,
 		struct msm_gem_address_space *aspace, bool locked);
 struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 		struct dma_buf *dmabuf, struct sg_table *sgt);
+void msm_gem_free_work(struct work_struct *work);

 __printf(2, 3)
 void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...);
@@ -419,6 +424,8 @@ struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
 		u32 id);
 int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
 		u32 prio, u32 flags, u32 *id);
+int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
+		struct drm_msm_submitqueue_query *args);
 int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
 void msm_submitqueue_close(struct msm_file_private *ctx);


--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -851,8 +851,18 @@ void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
 /* don't call directly!  Use drm_gem_object_put() and friends */
 void msm_gem_free_object(struct drm_gem_object *obj)
 {
-	struct drm_device *dev = obj->dev;
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	struct drm_device *dev = obj->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+
+	if (llist_add(&msm_obj->freed, &priv->free_list))
+		queue_work(priv->wq, &priv->free_work);
+}
+
+static void free_object(struct msm_gem_object *msm_obj)
+{
+	struct drm_gem_object *obj = &msm_obj->base;
+	struct drm_device *dev = obj->dev;

 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));

@@ -887,6 +897,29 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 	kfree(msm_obj);
 }

+void msm_gem_free_work(struct work_struct *work)
+{
+	struct msm_drm_private *priv =
+		container_of(work, struct msm_drm_private, free_work);
+	struct drm_device *dev = priv->dev;
+	struct llist_node *freed;
+	struct msm_gem_object *msm_obj, *next;
+
+	while ((freed = llist_del_all(&priv->free_list))) {
+
+		mutex_lock(&dev->struct_mutex);
+
+		llist_for_each_entry_safe(msm_obj, next,
+					  freed, freed)
+			free_object(msm_obj);
+
+		mutex_unlock(&dev->struct_mutex);
+
+		if (need_resched())
+			break;
+	}
+}
+
 /* convenience method to construct a GEM buffer object, and userspace handle */
 int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
 		uint32_t size, uint32_t flags, uint32_t *handle,
@@ -1017,6 +1050,13 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
 		ret = drm_gem_object_init(dev, obj, size);
 		if (ret)
 			goto fail;
+		/*
+		 * Our buffers are kept pinned, so allocating them from the
+		 * MOVABLE zone is a really bad idea, and conflicts with CMA.
+		 * See comments above new_inode() why this is required _and_
+		 * expected if you're going to pin these pages.
+		 */
+		mapping_set_gfp_mask(obj->filp->f_mapping, GFP_HIGHUSER);
 	}

 	return obj;

--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -84,6 +84,8 @@ struct msm_gem_object {

 	struct list_head vmas;    /* list of msm_gem_vma */

+	struct llist_node freed;
+
 	/* normally (resv == &_resv) except for imported bo's */
 	struct reservation_object *resv;
 	struct reservation_object _resv;
@@ -133,6 +135,7 @@ enum msm_gem_lock {

 void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass);
 void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass);
+void msm_gem_free_work(struct work_struct *work);

 /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
 * associated with the cmdstream submission for synchronization (and
@@ -163,7 +166,10 @@ struct msm_gem_submit {
 	} *cmd;  /* array of size nr_cmds */
 	struct {
 		uint32_t flags;
-		struct msm_gem_object *obj;
+		union {
+			struct msm_gem_object *obj;
+			uint32_t handle;
+		};
 		uint64_t iova;
 	} bos[0];
 };

--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -74,27 +74,14 @@ void msm_gem_submit_free(struct msm_gem_submit *submit)
 	kfree(submit);
 }

-static inline unsigned long __must_check
-copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
-{
-	if (access_ok(from, n))
-		return __copy_from_user_inatomic(to, from, n);
-	return -EFAULT;
-}
-
 static int submit_lookup_objects(struct msm_gem_submit *submit,
 		struct drm_msm_gem_submit *args, struct drm_file *file)
 {
 	unsigned i;
 	int ret = 0;

-	spin_lock(&file->table_lock);
-	pagefault_disable();
-
 	for (i = 0; i < args->nr_bos; i++) {
 		struct drm_msm_gem_submit_bo submit_bo;
-		struct drm_gem_object *obj;
-		struct msm_gem_object *msm_obj;
 		void __user *userptr =
 			u64_to_user_ptr(args->bos + (i * sizeof(submit_bo)));

@@ -103,15 +90,10 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 		 */
 		submit->bos[i].flags = 0;

-		if (copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo))) {
-			pagefault_enable();
-			spin_unlock(&file->table_lock);
-			if (copy_from_user(&submit_bo, userptr, sizeof(submit_bo))) {
-				ret = -EFAULT;
-				goto out;
-			}
-			spin_lock(&file->table_lock);
-			pagefault_disable();
+		if (copy_from_user(&submit_bo, userptr, sizeof(submit_bo))) {
+			ret = -EFAULT;
+			i = 0;
+			goto out;
 		}

 /* at least one of READ and/or WRITE flags should be set: */
@@ -121,19 +103,28 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 			!(submit_bo.flags & MANDATORY_FLAGS)) {
 			DRM_ERROR("invalid flags: %x\n", submit_bo.flags);
 			ret = -EINVAL;
-			goto out_unlock;
+			i = 0;
+			goto out;
 		}

+		submit->bos[i].handle = submit_bo.handle;
 		submit->bos[i].flags = submit_bo.flags;
 		/* in validate_objects() we figure out if this is true: */
 		submit->bos[i].iova  = submit_bo.presumed;
+	}
+
+	spin_lock(&file->table_lock);
+
+	for (i = 0; i < args->nr_bos; i++) {
+		struct drm_gem_object *obj;
+		struct msm_gem_object *msm_obj;

 		/* normally use drm_gem_object_lookup(), but for bulk lookup
 		 * all under single table_lock just hit object_idr directly:
 		 */
-		obj = idr_find(&file->object_idr, submit_bo.handle);
+		obj = idr_find(&file->object_idr, submit->bos[i].handle);
 		if (!obj) {
-			DRM_ERROR("invalid handle %u at index %u\n", submit_bo.handle, i);
+			DRM_ERROR("invalid handle %u at index %u\n", submit->bos[i].handle, i);
 			ret = -EINVAL;
 			goto out_unlock;
 		}
@@ -142,7 +133,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,

 		if (!list_empty(&msm_obj->submit_entry)) {
 			DRM_ERROR("handle %u at index %u already on submit list\n",
-					submit_bo.handle, i);
+					submit->bos[i].handle, i);
 			ret = -EINVAL;
 			goto out_unlock;
 		}
@@ -155,7 +146,6 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 	}

 out_unlock:
-	pagefault_enable();
 	spin_unlock(&file->table_lock);

 out:

--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -85,7 +85,7 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace,

 	vma->mapped = true;

-	if (aspace->mmu)
+	if (aspace && aspace->mmu)
 		ret = aspace->mmu->funcs->map(aspace->mmu, vma->iova, sgt,
 				size, prot);


--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -443,24 +443,15 @@ static void recover_worker(struct work_struct *work)
 	if (submit) {
 		struct task_struct *task;

+		/* Increment the fault counts */
+		gpu->global_faults++;
+		submit->queue->faults++;
+
 		task = get_pid_task(submit->pid, PIDTYPE_PID);
 		if (task) {
 			comm = kstrdup(task->comm, GFP_KERNEL);
-
-			/*
-			 * So slightly annoying, in other paths like
-			 * mmap'ing gem buffers, mmap_sem is acquired
-			 * before struct_mutex, which means we can't
-			 * hold struct_mutex across the call to
-			 * get_cmdline().  But submits are retired
-			 * from the same in-order workqueue, so we can
-			 * safely drop the lock here without worrying
-			 * about the submit going away.
-			 */
-			mutex_unlock(&dev->struct_mutex);
 			cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
 			put_task_struct(task);
-			mutex_lock(&dev->struct_mutex);
 		}

 		if (comm && cmd) {

--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -104,6 +104,9 @@ struct msm_gpu {
 	/* does gpu need hw_init? */
 	bool needs_hw_init;

+	/* number of GPU hangs (for all contexts) */
+	int global_faults;
+
 	/* worker for handling active-list retiring: */
 	struct work_struct retire_work;


--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -38,13 +38,8 @@ static int msm_iommu_attach(struct msm_mmu *mmu, const char * const *names,
 			    int cnt)
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);
-	int ret;

-	pm_runtime_get_sync(mmu->dev);
-	ret = iommu_attach_device(iommu->domain, mmu->dev);
-	pm_runtime_put_sync(mmu->dev);
-
-	return ret;
+	return iommu_attach_device(iommu->domain, mmu->dev);
 }

 static void msm_iommu_detach(struct msm_mmu *mmu, const char * const *names,
@@ -52,9 +47,7 @@ static void msm_iommu_detach(struct msm_mmu *mmu, const char * const *names,
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);

-	pm_runtime_get_sync(mmu->dev);
 	iommu_detach_device(iommu->domain, mmu->dev);
-	pm_runtime_put_sync(mmu->dev);
 }

 static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
@@ -63,9 +56,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
 	struct msm_iommu *iommu = to_msm_iommu(mmu);
 	size_t ret;

-//	pm_runtime_get_sync(mmu->dev);
 	ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);
-//	pm_runtime_put_sync(mmu->dev);
 	WARN_ON(!ret);

 	return (ret == len) ? 0 : -EINVAL;
@@ -75,9 +66,7 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint64_t iova, unsigned len)
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);

-	pm_runtime_get_sync(mmu->dev);
 	iommu_unmap(iommu->domain, iova, len);
-	pm_runtime_put_sync(mmu->dev);

 	return 0;
 }

--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -120,6 +120,47 @@ int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx)
 	return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL);
 }

+static int msm_submitqueue_query_faults(struct msm_gpu_submitqueue *queue,
+		struct drm_msm_submitqueue_query *args)
+{
+	size_t size = min_t(size_t, args->len, sizeof(queue->faults));
+	int ret;
+
+	/* If a zero length was passed in, return the data size we expect */
+	if (!args->len) {
+		args->len = sizeof(queue->faults);
+		return 0;
+	}
+
+	/* Set the length to the actual size of the data */
+	args->len = size;
+
+	ret = copy_to_user(u64_to_user_ptr(args->data), &queue->faults, size);
+
+	return ret ? -EFAULT : 0;
+}
+
+int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
+		struct drm_msm_submitqueue_query *args)
+{
+	struct msm_gpu_submitqueue *queue;
+	int ret = -EINVAL;
+
+	if (args->pad)
+		return -EINVAL;
+
+	queue = msm_submitqueue_get(ctx, args->id);
+	if (!queue)
+		return -ENOENT;
+
+	if (args->param == MSM_SUBMITQUEUE_PARAM_FAULTS)
+		ret = msm_submitqueue_query_faults(queue, args);
+
+	msm_submitqueue_put(queue);
+
+	return ret;
+}
+
 int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id)
 {
 	struct msm_gpu_submitqueue *entry;

--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -74,6 +74,8 @@ struct drm_msm_timespec {
 #define MSM_PARAM_TIMESTAMP  0x05
 #define MSM_PARAM_GMEM_BASE  0x06
 #define MSM_PARAM_NR_RINGS   0x07
+#define MSM_PARAM_PP_PGTABLE 0x08  /* => 1 for per-process pagetables, else 0 */
+#define MSM_PARAM_FAULTS     0x09

 struct drm_msm_param {
 	__u32 pipe;           /* in, MSM_PIPE_x */
@@ -286,6 +288,16 @@ struct drm_msm_submitqueue {
 	__u32 id;      /* out, identifier */
 };

+#define MSM_SUBMITQUEUE_PARAM_FAULTS   0
+
+struct drm_msm_submitqueue_query {
+	__u64 data;
+	__u32 id;
+	__u32 param;
+	__u32 len;
+	__u32 pad;
+};
+
 #define DRM_MSM_GET_PARAM              0x00
 /* placeholder:
 #define DRM_MSM_SET_PARAM              0x01
@@ -302,6 +314,7 @@ struct drm_msm_submitqueue {
 */
 #define DRM_MSM_SUBMITQUEUE_NEW        0x0A
 #define DRM_MSM_SUBMITQUEUE_CLOSE      0x0B
+#define DRM_MSM_SUBMITQUEUE_QUERY      0x0C

 #define DRM_IOCTL_MSM_GET_PARAM        DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
 #define DRM_IOCTL_MSM_GEM_NEW          DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
@@ -313,6 +326,7 @@ struct drm_msm_submitqueue {
 #define DRM_IOCTL_MSM_GEM_MADVISE      DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise)
 #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW    DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
 #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE  DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
+#define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY  DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)

 #if defined(__cplusplus)
 }