Commit 46040ea8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'drm-fixes-2024-09-13' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
 "Regular fixes pull, the amdgpu JPEG engine fixes are probably the
  biggest, they look to block some register accessing, otherwise there
  are just minor fixes and regression fixes all over.

  nouveau had a regression report going back a few kernels that finally
  got fixed, Not entirely happy with so many changes so late, but they
  all seem quite benign apart from the jpeg one.

  dma-buf/heaps:
   - fix off by one in CMA heap fault handler

  syncobj:
   - fix syncobj leak in drm_syncobj_eventfd_ioctl

  amdgpu:
   - Avoid races between set_drr() functions and dc_state_destruct()
   - Fix regerssion related to zpos
   - Fix regression related to overlay cursor
   - SMU 14.x updates
   - JPEG fixes
   - Silence an UBSAN warning

  amdkfd:
   - Fetch cacheline size from IP discovery

  i915:
   - Prevent a possible int overflow in wq offsets

  xe:
   - Remove a double include
   - Fix null checks and UAF
   - Fix access_ok check in user_fence_create
   - Fix compat IS_DISPLAY_STEP() range
   - OA fix
   - Fixes in show_meminfo

  nouveau:
   - fix GP10x regression on boot

  stm:
   - add COMMON_CLK dep

  rockchip:
   - iommu api change

  tegra:
   - iommu api change"

* tag 'drm-fixes-2024-09-13' of https://gitlab.freedesktop.org/drm/kernel: (25 commits)
  drm/xe/client: add missing bo locking in show_meminfo()
  drm/xe/client: fix deadlock in show_meminfo()
  drm/xe/oa: Enable Xe2+ PES disaggregation
  drm/xe/display: fix compat IS_DISPLAY_STEP() range end
  drm/xe: Fix access_ok check in user_fence_create
  drm/xe: Fix possible UAF in guc_exec_queue_process_msg
  drm/xe: Remove fence check from send_tlb_invalidation
  drm/xe/gt: Remove double include
  drm/amd/display: Add all planes on CRTC to state for overlay cursor
  drm/amdgpu/atomfirmware: Silence UBSAN warning
  drm/amd/amdgpu: apply command submission parser for JPEG v1
  drm/amd/amdgpu: apply command submission parser for JPEG v2+
  drm/amd/pm: fix the pp_dpm_pcie issue on smu v14.0.2/3
  drm/amd/pm: update the features set on smu v14.0.2/3
  drm/amd/display: Do not reset planes based on crtc zpos_changed
  drm/amd/display: Avoid race between dcn35_set_drr() and dc_state_destruct()
  drm/amd/display: Avoid race between dcn10_set_drr() and dc_state_destruct()
  drm/amdkfd: Add cache line size info
  drm/tegra: Use iommu_paging_domain_alloc()
  drm/rockchip: Use iommu_paging_domain_alloc()
  ...
parents 196145c6 135be1dc
......@@ -165,7 +165,7 @@ static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct cma_heap_buffer *buffer = vma->vm_private_data;
if (vmf->pgoff > buffer->pagecount)
if (vmf->pgoff >= buffer->pagecount)
return VM_FAULT_SIGBUS;
return vmf_insert_pfn(vma, vmf->address, page_to_pfn(buffer->pages[vmf->pgoff]));
......
......@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v1_0.h"
......@@ -34,6 +35,9 @@
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
{
......@@ -300,6 +304,9 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
if (ring->funcs->parse_cs)
amdgpu_ring_write(ring, 0);
else
amdgpu_ring_write(ring, (vmid | (vmid << 4)));
amdgpu_ring_write(ring,
......@@ -554,6 +561,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
.parse_cs = jpeg_v1_dec_ring_parse_cs,
.emit_frame_size =
6 + 6 + /* hdp invalidate / flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
......@@ -611,3 +619,69 @@ static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
}
/**
* jpeg_v1_dec_ring_parse_cs - command submission parser
*
* @parser: Command submission parser context
* @job: the job to parse
* @ib: the IB to parse
*
* Parse the command stream, return -EINVAL for invalid packet,
* 0 otherwise
*/
static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
u32 i, reg, res, cond, type;
int ret = 0;
struct amdgpu_device *adev = parser->adev;
for (i = 0; i < ib->length_dw ; i += 2) {
reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
res = CP_PACKETJ_GET_RES(ib->ptr[i]);
cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */
return -EINVAL;
if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END)
continue;
switch (type) {
case PACKETJ_TYPE0:
if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH &&
reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW &&
reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH &&
reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW &&
reg != JPEG_V1_REG_CTX_INDEX &&
reg != JPEG_V1_REG_CTX_DATA) {
ret = -EINVAL;
}
break;
case PACKETJ_TYPE1:
if (reg != JPEG_V1_REG_CTX_DATA)
ret = -EINVAL;
break;
case PACKETJ_TYPE3:
if (reg != JPEG_V1_REG_SOFT_RESET)
ret = -EINVAL;
break;
case PACKETJ_TYPE6:
if (ib->ptr[i] != CP_PACKETJ_NOP)
ret = -EINVAL;
break;
default:
ret = -EINVAL;
}
if (ret) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
break;
}
}
return ret;
}
......@@ -29,4 +29,15 @@ int jpeg_v1_0_sw_init(void *handle);
void jpeg_v1_0_sw_fini(void *handle);
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
#define JPEG_V1_REG_RANGE_START 0x8000
#define JPEG_V1_REG_RANGE_END 0x803f
#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238
#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239
#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a
#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b
#define JPEG_V1_REG_CTX_INDEX 0x8328
#define JPEG_V1_REG_CTX_DATA 0x8329
#define JPEG_V1_REG_SOFT_RESET 0x83a0
#endif /*__JPEG_V1_0_H__*/
......@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_cs.h"
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
......@@ -538,6 +539,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
if (ring->funcs->parse_cs)
amdgpu_ring_write(ring, 0);
else
amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
......@@ -764,6 +769,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......@@ -810,3 +816,58 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = {
.rev = 0,
.funcs = &jpeg_v2_0_ip_funcs,
};
/**
* jpeg_v2_dec_ring_parse_cs - command submission parser
*
* @parser: Command submission parser context
* @job: the job to parse
* @ib: the IB to parse
*
* Parse the command stream, return -EINVAL for invalid packet,
* 0 otherwise
*/
int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
u32 i, reg, res, cond, type;
struct amdgpu_device *adev = parser->adev;
for (i = 0; i < ib->length_dw ; i += 2) {
reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
res = CP_PACKETJ_GET_RES(ib->ptr[i]);
cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
if (res) /* only support 0 at the moment */
return -EINVAL;
switch (type) {
case PACKETJ_TYPE0:
if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START ||
reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE3:
if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START ||
reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE6:
if (ib->ptr[i] == CP_PACKETJ_NOP)
continue;
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
default:
dev_err(adev->dev, "Unknown packet type %d !\n", type);
return -EINVAL;
}
}
return 0;
}
......@@ -45,6 +45,9 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
#define JPEG_REG_RANGE_START 0x4000
#define JPEG_REG_RANGE_END 0x41c2
void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
......@@ -57,6 +60,9 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr);
void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block;
......
......@@ -662,6 +662,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......@@ -691,6 +692,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......
......@@ -560,6 +560,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......
......@@ -727,6 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......
......@@ -32,5 +32,4 @@ enum amdgpu_jpeg_v4_0_sub_block {
};
extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
#endif /* __JPEG_V4_0_H__ */
......@@ -23,9 +23,9 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
#include "jpeg_v4_0_3.h"
#include "mmsch_v4_0_3.h"
......@@ -1089,7 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
.parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......@@ -1254,56 +1254,3 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
{
adev->jpeg.ras = &jpeg_v4_0_3_ras;
}
/**
* jpeg_v4_0_3_dec_ring_parse_cs - command submission parser
*
* @parser: Command submission parser context
* @job: the job to parse
* @ib: the IB to parse
*
* Parse the command stream, return -EINVAL for invalid packet,
* 0 otherwise
*/
int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
uint32_t i, reg, res, cond, type;
struct amdgpu_device *adev = parser->adev;
for (i = 0; i < ib->length_dw ; i += 2) {
reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
res = CP_PACKETJ_GET_RES(ib->ptr[i]);
cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
if (res) /* only support 0 at the moment */
return -EINVAL;
switch (type) {
case PACKETJ_TYPE0:
if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE3:
if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE6:
if (ib->ptr[i] == CP_PACKETJ_NOP)
continue;
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
default:
dev_err(adev->dev, "Unknown packet type %d !\n", type);
return -EINVAL;
}
}
return 0;
}
......@@ -46,9 +46,6 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
#define JPEG_REG_RANGE_START 0x4000
#define JPEG_REG_RANGE_END 0x41c2
extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
......@@ -65,7 +62,5 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask);
int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
#endif /* __JPEG_V4_0_3_H__ */
......@@ -768,6 +768,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_5_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_5_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_5_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......
......@@ -26,6 +26,7 @@
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
#include "jpeg_v4_0_3.h"
#include "vcn/vcn_5_0_0_offset.h"
......@@ -646,7 +647,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
.parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......
......@@ -1434,7 +1434,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
......@@ -1446,6 +1447,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
i++;
}
/* Scalar L1 Data Cache per SQC */
......@@ -1456,6 +1458,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
i++;
}
/* GL1 Data Cache per SA */
......@@ -1468,6 +1471,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = 0;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
......@@ -1478,6 +1482,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
i++;
}
/* L3 Data Cache per GPU */
......@@ -1488,6 +1493,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = 0;
i++;
}
return i;
......
......@@ -10571,7 +10571,7 @@ static bool should_reset_plane(struct drm_atomic_state *state,
* TODO: We can likely skip bandwidth validation if the only thing that
* changed about the plane was it'z z-ordering.
*/
if (new_crtc_state->zpos_changed)
if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos)
return true;
if (drm_atomic_crtc_needs_modeset(new_crtc_state))
......@@ -11419,6 +11419,17 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
drm_dbg(dev, "Failed to determine cursor mode\n");
goto fail;
}
/*
* If overlay cursor is needed, DC cannot go through the
* native cursor update path. All enabled planes on the CRTC
* need to be added for DC to not disable a plane by mistake
*/
if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) {
ret = drm_atomic_add_affected_planes(state, crtc);
if (ret)
goto fail;
}
}
/* Remove exiting planes if they are modified */
......
......@@ -3207,15 +3207,19 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
* as well.
*/
for (i = 0; i < num_pipes; i++) {
if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
pipe_ctx[i]->stream_res.tg->funcs->set_drr(
pipe_ctx[i]->stream_res.tg, &params);
/* dc_state_destruct() might null the stream resources, so fetch tg
* here first to avoid a race condition. The lifetime of the pointee
* itself (the timing_generator object) is not a problem here.
*/
struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
if ((tg != NULL) && tg->funcs) {
if (tg->funcs->set_drr)
tg->funcs->set_drr(tg, &params);
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
pipe_ctx[i]->stream_res.tg,
event_triggers, num_frames);
if (tg->funcs->set_static_screen_control)
tg->funcs->set_static_screen_control(
tg, event_triggers, num_frames);
}
}
}
......
......@@ -1462,7 +1462,13 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
for (i = 0; i < num_pipes; i++) {
if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
/* dc_state_destruct() might null the stream resources, so fetch tg
* here first to avoid a race condition. The lifetime of the pointee
* itself (the timing_generator object) is not a problem here.
*/
struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
if ((tg != NULL) && tg->funcs) {
struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing;
struct dc *dc = pipe_ctx[i]->stream->ctx->dc;
......@@ -1475,14 +1481,12 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
num_frames = 2 * (frame_rate % 60);
}
}
if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
pipe_ctx[i]->stream_res.tg->funcs->set_drr(
pipe_ctx[i]->stream_res.tg, &params);
if (tg->funcs->set_drr)
tg->funcs->set_drr(tg, &params);
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
pipe_ctx[i]->stream_res.tg,
event_triggers, num_frames);
if (tg->funcs->set_static_screen_control)
tg->funcs->set_static_screen_control(
tg, event_triggers, num_frames);
}
}
}
......
......@@ -1038,7 +1038,7 @@ struct display_object_info_table_v1_4
uint16_t supporteddevices;
uint8_t number_of_path;
uint8_t reserved;
struct atom_display_object_path_v2 display_path[8]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path
struct atom_display_object_path_v2 display_path[]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path
};
struct display_object_info_table_v1_5 {
......@@ -1048,7 +1048,7 @@ struct display_object_info_table_v1_5 {
uint8_t reserved;
// the real number of this included in the structure is calculated by using the
// (whole structure size - the header size- number_of_path)/size of atom_display_object_path
struct atom_display_object_path_v3 display_path[8];
struct atom_display_object_path_v3 display_path[];
};
/*
......
......@@ -439,7 +439,16 @@ enum smu_clk_type {
__SMU_DUMMY_MAP(BACO_CG), \
__SMU_DUMMY_MAP(SOC_CG), \
__SMU_DUMMY_MAP(LOW_POWER_DCNCLKS), \
__SMU_DUMMY_MAP(WHISPER_MODE),
__SMU_DUMMY_MAP(WHISPER_MODE), \
__SMU_DUMMY_MAP(EDC_PWRBRK), \
__SMU_DUMMY_MAP(SOC_EDC_XVMIN), \
__SMU_DUMMY_MAP(GFX_PSM_DIDT), \
__SMU_DUMMY_MAP(APT_ALL_ENABLE), \
__SMU_DUMMY_MAP(APT_SQ_THROTTLE), \
__SMU_DUMMY_MAP(APT_PF_DCS), \
__SMU_DUMMY_MAP(GFX_EDC_XVMIN), \
__SMU_DUMMY_MAP(GFX_DIDT_XVMIN), \
__SMU_DUMMY_MAP(FAN_ABNORMAL),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(feature) SMU_FEATURE_##feature##_BIT
......
......@@ -187,6 +187,15 @@ static struct cmn2asic_mapping smu_v14_0_2_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(MEM_TEMP_READ),
FEA_MAP(ATHUB_MMHUB_PG),
FEA_MAP(SOC_PCC),
FEA_MAP(EDC_PWRBRK),
FEA_MAP(SOC_EDC_XVMIN),
FEA_MAP(GFX_PSM_DIDT),
FEA_MAP(APT_ALL_ENABLE),
FEA_MAP(APT_SQ_THROTTLE),
FEA_MAP(APT_PF_DCS),
FEA_MAP(GFX_EDC_XVMIN),
FEA_MAP(GFX_DIDT_XVMIN),
FEA_MAP(FAN_ABNORMAL),
[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT},
......@@ -674,6 +683,9 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu)
pcie_table->clk_freq[pcie_table->num_of_link_levels] =
skutable->LclkFreq[link_level];
pcie_table->num_of_link_levels++;
if (link_level == 0)
link_level++;
}
/* dcefclk dpm table setup */
......
......@@ -1464,6 +1464,7 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data,
struct drm_syncobj *syncobj;
struct eventfd_ctx *ev_fd_ctx;
struct syncobj_eventfd_entry *entry;
int ret;
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
return -EOPNOTSUPP;
......@@ -1479,13 +1480,15 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
ev_fd_ctx = eventfd_ctx_fdget(args->fd);
if (IS_ERR(ev_fd_ctx))
return PTR_ERR(ev_fd_ctx);
if (IS_ERR(ev_fd_ctx)) {
ret = PTR_ERR(ev_fd_ctx);
goto err_fdget;
}
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
eventfd_ctx_put(ev_fd_ctx);
return -ENOMEM;
ret = -ENOMEM;
goto err_kzalloc;
}
entry->syncobj = syncobj;
entry->ev_fd_ctx = ev_fd_ctx;
......@@ -1496,6 +1499,12 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data,
drm_syncobj_put(syncobj);
return 0;
err_kzalloc:
eventfd_ctx_put(ev_fd_ctx);
err_fdget:
drm_syncobj_put(syncobj);
return ret;
}
int
......
......@@ -2842,9 +2842,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce,
ce->parallel.guc.wqi_tail = 0;
ce->parallel.guc.wqi_head = 0;
wq_desc_offset = i915_ggtt_offset(ce->state) +
wq_desc_offset = (u64)i915_ggtt_offset(ce->state) +
__get_parent_scratch_offset(ce);
wq_base_offset = i915_ggtt_offset(ce->state) +
wq_base_offset = (u64)i915_ggtt_offset(ce->state) +
__get_wq_offset(ce);
info->wq_desc_lo = lower_32_bits(wq_desc_offset);
info->wq_desc_hi = upper_32_bits(wq_desc_offset);
......
......@@ -46,6 +46,8 @@ u32 gm107_ram_probe_fbp(const struct nvkm_ram_func *,
u32 gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32,
struct nvkm_device *, int, int *);
int gp100_ram_init(struct nvkm_ram *);
/* RAM type-specific MR calculation routines */
int nvkm_sddr2_calc(struct nvkm_ram *);
int nvkm_sddr3_calc(struct nvkm_ram *);
......
......@@ -27,7 +27,7 @@
#include <subdev/bios/init.h>
#include <subdev/bios/rammap.h>
static int
int
gp100_ram_init(struct nvkm_ram *ram)
{
struct nvkm_subdev *subdev = &ram->fb->subdev;
......
......@@ -5,6 +5,7 @@
static const struct nvkm_ram_func
gp102_ram = {
.init = gp100_ram_init,
};
int
......
......@@ -103,13 +103,17 @@ static int rockchip_drm_init_iommu(struct drm_device *drm_dev)
struct rockchip_drm_private *private = drm_dev->dev_private;
struct iommu_domain_geometry *geometry;
u64 start, end;
int ret;
if (IS_ERR_OR_NULL(private->iommu_dev))
return 0;
private->domain = iommu_domain_alloc(private->iommu_dev->bus);
if (!private->domain)
return -ENOMEM;
private->domain = iommu_paging_domain_alloc(private->iommu_dev);
if (IS_ERR(private->domain)) {
ret = PTR_ERR(private->domain);
private->domain = NULL;
return ret;
}
geometry = &private->domain->geometry;
start = geometry->aperture_start;
......
......@@ -2,6 +2,7 @@
config DRM_STM
tristate "DRM Support for STMicroelectronics SoC Series"
depends on DRM && (ARCH_STM32 || COMPILE_TEST)
depends on COMMON_CLK
select DRM_KMS_HELPER
select DRM_GEM_DMA_HELPER
select DRM_PANEL_BRIDGE
......
......@@ -1135,6 +1135,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)
static int host1x_drm_probe(struct host1x_device *dev)
{
struct device *dma_dev = dev->dev.parent;
struct tegra_drm *tegra;
struct drm_device *drm;
int err;
......@@ -1149,8 +1150,8 @@ static int host1x_drm_probe(struct host1x_device *dev)
goto put;
}
if (host1x_drm_wants_iommu(dev) && iommu_present(&platform_bus_type)) {
tegra->domain = iommu_domain_alloc(&platform_bus_type);
if (host1x_drm_wants_iommu(dev) && device_iommu_mapped(dma_dev)) {
tegra->domain = iommu_paging_domain_alloc(dma_dev);
if (!tegra->domain) {
err = -ENOMEM;
goto free;
......
......@@ -83,7 +83,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270
/* Workarounds not handled yet */
#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; })
#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step < last; })
#define IS_LP(xe) (0)
#define IS_GEN9_LP(xe) (0)
......
......@@ -52,6 +52,7 @@
#define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
#define OAG_OACONTROL XE_REG(0xdaf4)
#define OAG_OACONTROL_OA_PES_DISAG_EN REG_GENMASK(27, 22)
#define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16)
#define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2)
#define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0)
......
......@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_bo_types.h"
#include "xe_device_types.h"
......@@ -151,10 +152,13 @@ void xe_drm_client_add_bo(struct xe_drm_client *client,
*/
void xe_drm_client_remove_bo(struct xe_bo *bo)
{
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
struct xe_drm_client *client = bo->client;
xe_assert(xe, !kref_read(&bo->ttm.base.refcount));
spin_lock(&client->bos_lock);
list_del(&bo->client_link);
list_del_init(&bo->client_link);
spin_unlock(&client->bos_lock);
xe_drm_client_put(client);
......@@ -166,6 +170,8 @@ static void bo_meminfo(struct xe_bo *bo,
u64 sz = bo->size;
u32 mem_type;
xe_bo_assert_held(bo);
if (bo->placement.placement)
mem_type = bo->placement.placement->mem_type;
else
......@@ -196,6 +202,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
struct xe_drm_client *client;
struct drm_gem_object *obj;
struct xe_bo *bo;
LLIST_HEAD(deferred);
unsigned int id;
u32 mem_type;
......@@ -206,7 +213,20 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
idr_for_each_entry(&file->object_idr, obj, id) {
struct xe_bo *bo = gem_to_xe_bo(obj);
if (dma_resv_trylock(bo->ttm.base.resv)) {
bo_meminfo(bo, stats);
xe_bo_unlock(bo);
} else {
xe_bo_get(bo);
spin_unlock(&file->table_lock);
xe_bo_lock(bo, false);
bo_meminfo(bo, stats);
xe_bo_unlock(bo);
xe_bo_put(bo);
spin_lock(&file->table_lock);
}
}
spin_unlock(&file->table_lock);
......@@ -215,11 +235,28 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
list_for_each_entry(bo, &client->bos_list, client_link) {
if (!kref_get_unless_zero(&bo->ttm.base.refcount))
continue;
if (dma_resv_trylock(bo->ttm.base.resv)) {
bo_meminfo(bo, stats);
xe_bo_put(bo);
xe_bo_unlock(bo);
} else {
spin_unlock(&client->bos_lock);
xe_bo_lock(bo, false);
bo_meminfo(bo, stats);
xe_bo_unlock(bo);
spin_lock(&client->bos_lock);
/* The bo ref will prevent this bo from being removed from the list */
xe_assert(xef->xe, !list_empty(&bo->client_link));
}
xe_bo_put_deferred(bo, &deferred);
}
spin_unlock(&client->bos_lock);
xe_bo_put_commit(&deferred);
for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
if (!xe_mem_type_to_name[mem_type])
continue;
......
......@@ -9,7 +9,6 @@
#include <drm/drm_managed.h>
#include <drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
#include <generated/xe_wa_oob.h>
......
......@@ -182,7 +182,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
action[1] = seqno;
ret = xe_guc_ct_send_locked(&guc->ct, action, len,
G2H_LEN_DW_TLB_INVALIDATE, 1);
if (!ret && fence) {
if (!ret) {
spin_lock_irq(&gt->tlb_invalidation.pending_lock);
/*
* We haven't actually published the TLB fence as per
......@@ -203,7 +203,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
tlb_timeout_jiffies(gt));
}
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
} else if (ret < 0 && fence) {
} else if (ret < 0) {
__invalidation_fence_signal(xe, fence);
}
if (!ret) {
......
......@@ -1375,6 +1375,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
{
struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
trace_xe_sched_msg_recv(msg);
switch (msg->opcode) {
......@@ -1394,7 +1396,7 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
XE_WARN_ON("Unknown message type");
}
xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data)));
xe_pm_runtime_put(xe);
}
static const struct drm_sched_backend_ops drm_sched_ops = {
......
......@@ -440,6 +440,10 @@ static void xe_oa_enable(struct xe_oa_stream *stream)
val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) |
__oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE;
if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
val |= OAG_OACONTROL_OA_PES_DISAG_EN;
xe_mmio_write32(stream->gt, regs->oa_ctrl, val);
}
......
......@@ -55,7 +55,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
struct xe_user_fence *ufence;
u64 __user *ptr = u64_to_user_ptr(addr);
if (!access_ok(ptr, sizeof(ptr)))
if (!access_ok(ptr, sizeof(*ptr)))
return ERR_PTR(-EFAULT);
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment