amdgpu_amdkfd.c 17.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "amdgpu_amdkfd.h"
24
#include "amd_shared.h"
25

26
#include "amdgpu.h"
27
#include "amdgpu_gfx.h"
28
#include "amdgpu_dma_buf.h"
29
#include <linux/module.h>
30
#include <linux/dma-buf.h>
31
#include "amdgpu_xgmi.h"
32
#include <uapi/linux/kfd_ioctl.h>
33

34 35 36 37 38
/* Total memory size in system memory and all GPU VRAM. Used to
 * estimate worst case amount of memory to reserve for page tables
 */
uint64_t amdgpu_amdkfd_total_mem_size;

39
static bool kfd_initialized;
40

41
int amdgpu_amdkfd_init(void)
42
{
43
	struct sysinfo si;
44 45
	int ret;

46
	si_meminfo(&si);
47
	amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
48 49
	amdgpu_amdkfd_total_mem_size *= si.mem_unit;

50
	ret = kgd2kfd_init();
51
	amdgpu_amdkfd_gpuvm_init_mem_limits();
52
	kfd_initialized = !ret;
53

54
	return ret;
55 56
}

57 58
void amdgpu_amdkfd_fini(void)
{
59 60 61 62
	if (kfd_initialized) {
		kgd2kfd_exit();
		kfd_initialized = false;
	}
63 64 65
}

void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
66
{
67
	bool vf = amdgpu_sriov_vf(adev);
68

69 70 71
	if (!kfd_initialized)
		return;

72
	adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
73
				      adev->pdev, adev->asic_type, vf);
74 75 76

	if (adev->kfd.dev)
		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
77 78
}

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
/**
 * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
 *                                setup amdkfd
 *
 * @adev: amdgpu_device pointer
 * @aperture_base: output returning doorbell aperture base physical address
 * @aperture_size: output returning doorbell aperture size in bytes
 * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
 *
 * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
 * takes doorbells required for its own rings and reports the setup to amdkfd.
 * amdgpu reserved doorbells are at the start of the doorbell aperture.
 */
static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
					 phys_addr_t *aperture_base,
					 size_t *aperture_size,
					 size_t *start_offset)
{
	/*
	 * The first num_doorbells are used by amdgpu.
	 * amdkfd takes whatever's left in the aperture.
	 */
	if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
		*aperture_base = adev->doorbell.base;
		*aperture_size = adev->doorbell.size;
		*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
	} else {
		*aperture_base = 0;
		*aperture_size = 0;
		*start_offset = 0;
	}
}

112
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
113
{
114
	int i;
115
	int last_valid_bit;
116 117

	if (adev->kfd.dev) {
118
		struct kgd2kfd_shared_resources gpu_resources = {
119 120 121
			.compute_vmid_bitmap =
				((1 << AMDGPU_NUM_VMID) - 1) -
				((1 << adev->vm_manager.first_kfd_vmid) - 1),
122
			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
123 124 125
			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
			.gpuvm_size = min(adev->vm_manager.max_pfn
					  << AMDGPU_GPU_PAGE_SHIFT,
126
					  AMDGPU_GMC_HOLE_START),
127
			.drm_render_minor = adev_to_drm(adev)->render->index,
128 129
			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,

130 131
		};

132
		/* this is going to have a few of the MSBs set that we need to
133 134
		 * clear
		 */
135
		bitmap_complement(gpu_resources.cp_queue_bitmap,
136 137 138 139
				  adev->gfx.mec.queue_bitmap,
				  KGD_MAX_QUEUES);

		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
140 141
		 * nbits is not compile time constant
		 */
142
		last_valid_bit = 1 /* only first MEC can have compute queues */
143 144 145
				* adev->gfx.mec.num_pipe_per_mec
				* adev->gfx.mec.num_queue_per_pipe;
		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
146
			clear_bit(i, gpu_resources.cp_queue_bitmap);
147

148
		amdgpu_doorbell_get_kfd_info(adev,
149 150 151
				&gpu_resources.doorbell_physical_address,
				&gpu_resources.doorbell_aperture_size,
				&gpu_resources.doorbell_start_offset);
152

153 154 155 156 157 158 159
		/* Since SOC15, BIF starts to statically use the
		 * lower 12 bits of doorbell addresses for routing
		 * based on settings in registers like
		 * SDMA0_DOORBELL_RANGE etc..
		 * In order to route a doorbell to CP engine, the lower
		 * 12 bits of its address has to be outside the range
		 * set for SDMA, VCN, and IH blocks.
160
		 */
161 162 163 164 165 166
		if (adev->asic_type >= CHIP_VEGA10) {
			gpu_resources.non_cp_doorbells_start =
					adev->doorbell_index.first_non_cp;
			gpu_resources.non_cp_doorbells_end =
					adev->doorbell_index.last_non_cp;
		}
167

168 169
		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
						adev_to_drm(adev), &gpu_resources);
170 171 172
	}
}

173
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
174
{
175
	if (adev->kfd.dev) {
176
		kgd2kfd_device_exit(adev->kfd.dev);
177
		adev->kfd.dev = NULL;
178 179 180
	}
}

181
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
182 183
		const void *ih_ring_entry)
{
184
	if (adev->kfd.dev)
185
		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
186 187
}

188
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
189
{
190
	if (adev->kfd.dev)
191
		kgd2kfd_suspend(adev->kfd.dev, run_pm);
192 193
}

194
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
195 196 197
{
	int r = 0;

198
	if (adev->kfd.dev)
199
		r = kgd2kfd_resume(adev->kfd.dev, run_pm);
200 201 202 203

	return r;
}

204 205 206 207
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
{
	int r = 0;

208
	if (adev->kfd.dev)
209
		r = kgd2kfd_pre_reset(adev->kfd.dev);
210 211 212 213 214 215 216 217

	return r;
}

int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
{
	int r = 0;

218
	if (adev->kfd.dev)
219
		r = kgd2kfd_post_reset(adev->kfd.dev);
220 221 222 223

	return r;
}

224 225 226 227
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

228 229
	if (amdgpu_device_should_recover_gpu(adev))
		amdgpu_device_gpu_recover(adev, NULL);
230 231
}

232 233
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
				void **mem_obj, uint64_t *gpu_addr,
234
				void **cpu_ptr, bool cp_mqd_gfx9)
235
{
236
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
237
	struct amdgpu_bo *bo = NULL;
238
	struct amdgpu_bo_param bp;
239
	int r;
240
	void *cpu_ptr_tmp = NULL;
241

242 243 244 245 246 247 248
	memset(&bp, 0, sizeof(bp));
	bp.size = size;
	bp.byte_align = PAGE_SIZE;
	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
	bp.type = ttm_bo_type_kernel;
	bp.resv = NULL;
249

250 251
	if (cp_mqd_gfx9)
		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
252

253
	r = amdgpu_bo_create(adev, &bp, &bo);
254
	if (r) {
255
		dev_err(adev->dev,
256 257 258 259 260
			"failed to allocate BO for amdkfd (%d)\n", r);
		return r;
	}

	/* map the buffer */
261
	r = amdgpu_bo_reserve(bo, true);
262
	if (r) {
263
		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
264 265 266
		goto allocate_mem_reserve_bo_failed;
	}

267
	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
268
	if (r) {
269
		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
270 271 272
		goto allocate_mem_pin_bo_failed;
	}

273 274 275 276 277 278
	r = amdgpu_ttm_alloc_gart(&bo->tbo);
	if (r) {
		dev_err(adev->dev, "%p bind failed\n", bo);
		goto allocate_mem_kmap_bo_failed;
	}

279
	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
280
	if (r) {
281
		dev_err(adev->dev,
282 283 284 285
			"(%d) failed to map bo to kernel for amdkfd\n", r);
		goto allocate_mem_kmap_bo_failed;
	}

286
	*mem_obj = bo;
287
	*gpu_addr = amdgpu_bo_gpu_offset(bo);
288 289 290
	*cpu_ptr = cpu_ptr_tmp;

	amdgpu_bo_unreserve(bo);
291 292 293 294

	return 0;

allocate_mem_kmap_bo_failed:
295
	amdgpu_bo_unpin(bo);
296
allocate_mem_pin_bo_failed:
297
	amdgpu_bo_unreserve(bo);
298
allocate_mem_reserve_bo_failed:
299
	amdgpu_bo_unref(&bo);
300 301 302 303

	return r;
}

304
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
305
{
306 307 308 309 310 311 312
	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;

	amdgpu_bo_reserve(bo, true);
	amdgpu_bo_kunmap(bo);
	amdgpu_bo_unpin(bo);
	amdgpu_bo_unreserve(bo);
	amdgpu_bo_unref(&(bo));
313 314
}

315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
				void **mem_obj)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	struct amdgpu_bo *bo = NULL;
	struct amdgpu_bo_param bp;
	int r;

	memset(&bp, 0, sizeof(bp));
	bp.size = size;
	bp.byte_align = 1;
	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
	bp.type = ttm_bo_type_device;
	bp.resv = NULL;

	r = amdgpu_bo_create(adev, &bp, &bo);
	if (r) {
		dev_err(adev->dev,
			"failed to allocate gws BO for amdkfd (%d)\n", r);
		return r;
	}

	*mem_obj = bo;
	return 0;
}

void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
{
	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;

	amdgpu_bo_unref(&bo);
}

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
				      enum kgd_engine_type type)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	switch (type) {
	case KGD_ENGINE_PFP:
		return adev->gfx.pfp_fw_version;

	case KGD_ENGINE_ME:
		return adev->gfx.me_fw_version;

	case KGD_ENGINE_CE:
		return adev->gfx.ce_fw_version;

	case KGD_ENGINE_MEC1:
		return adev->gfx.mec_fw_version;

	case KGD_ENGINE_MEC2:
		return adev->gfx.mec2_fw_version;

	case KGD_ENGINE_RLC:
		return adev->gfx.rlc_fw_version;

	case KGD_ENGINE_SDMA1:
		return adev->sdma.instance[0].fw_version;

	case KGD_ENGINE_SDMA2:
		return adev->sdma.instance[1].fw_version;

	default:
		return 0;
	}

	return 0;
}

386 387
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
				      struct kfd_local_mem_info *mem_info)
388 389 390 391
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	memset(mem_info, 0, sizeof(*mem_info));
392 393 394 395 396

	mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
	mem_info->local_mem_size_private = adev->gmc.real_vram_size -
						adev->gmc.visible_vram_size;

397
	mem_info->vram_width = adev->gmc.vram_width;
398

399 400
	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
			&adev->gmc.aper_base,
401 402 403 404 405
			mem_info->local_mem_size_public,
			mem_info->local_mem_size_private);

	if (amdgpu_sriov_vf(adev))
		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
406
	else if (adev->pm.dpm_enabled) {
407 408 409 410 411
		if (amdgpu_emu_mode == 1)
			mem_info->mem_clk_max = 0;
		else
			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
	} else
412
		mem_info->mem_clk_max = 100;
413 414
}

415
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
416
{
417
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
418

419 420
	if (adev->gfx.funcs->get_gpu_clock_counter)
		return adev->gfx.funcs->get_gpu_clock_counter(adev);
421 422 423
	return 0;
}

424
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
425
{
426
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
427

428 429 430
	/* the sclk is in quantas of 10kHz */
	if (amdgpu_sriov_vf(adev))
		return adev->clock.default_sclk / 100;
431
	else if (adev->pm.dpm_enabled)
432 433 434
		return amdgpu_dpm_get_sclk(adev, false) / 100;
	else
		return 100;
435
}
436

437
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	struct amdgpu_cu_info acu_info = adev->gfx.cu_info;

	memset(cu_info, 0, sizeof(*cu_info));
	if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
		return;

	cu_info->cu_active_number = acu_info.number;
	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
	       sizeof(acu_info.bitmap));
	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
	cu_info->simd_per_cu = acu_info.simd_per_cu;
	cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
	cu_info->wave_front_size = acu_info.wave_front_size;
	cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
	cu_info->lds_size = acu_info.lds_size;
}
459

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
				  struct kgd_dev **dma_buf_kgd,
				  uint64_t *bo_size, void *metadata_buffer,
				  size_t buffer_size, uint32_t *metadata_size,
				  uint32_t *flags)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	struct dma_buf *dma_buf;
	struct drm_gem_object *obj;
	struct amdgpu_bo *bo;
	uint64_t metadata_flags;
	int r = -EINVAL;

	dma_buf = dma_buf_get(dma_buf_fd);
	if (IS_ERR(dma_buf))
		return PTR_ERR(dma_buf);

	if (dma_buf->ops != &amdgpu_dmabuf_ops)
		/* Can't handle non-graphics buffers */
		goto out_put;

	obj = dma_buf->priv;
482
	if (obj->dev->driver != adev_to_drm(adev)->driver)
483 484 485
		/* Can't handle buffers from different drivers */
		goto out_put;

486
	adev = drm_to_adev(obj->dev);
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
	bo = gem_to_amdgpu_bo(obj);
	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
				    AMDGPU_GEM_DOMAIN_GTT)))
		/* Only VRAM and GTT BOs are supported */
		goto out_put;

	r = 0;
	if (dma_buf_kgd)
		*dma_buf_kgd = (struct kgd_dev *)adev;
	if (bo_size)
		*bo_size = amdgpu_bo_size(bo);
	if (metadata_size)
		*metadata_size = bo->metadata_size;
	if (metadata_buffer)
		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
					   metadata_size, &metadata_flags);
	if (flags) {
		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
505 506
				KFD_IOC_ALLOC_MEM_FLAGS_VRAM
				: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
507 508

		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
509
			*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
510 511 512 513 514 515 516
	}

out_put:
	dma_buf_put(dma_buf);
	return r;
}

517 518 519
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
520
	struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
521

522
	return amdgpu_vram_mgr_usage(vram_man);
523
}
524

525 526 527 528 529 530
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	return adev->gmc.xgmi.hive_id;
}
531 532 533 534 535 536 537 538

uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	return adev->unique_id;
}

539 540 541 542 543 544 545 546 547 548 549 550 551 552
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
{
	struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
	struct amdgpu_device *adev = (struct amdgpu_device *)dst;
	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);

	if (ret < 0) {
		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
			adev->gmc.xgmi.physical_node_id,
			peer_adev->gmc.xgmi.physical_node_id, ret);
		ret = 0;
	}
	return  (uint8_t)ret;
}
553

554 555 556 557 558 559 560
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	return adev->rmmio_remap.bus_addr;
}

561 562 563 564 565 566 567
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	return adev->gds.gws_size;
}

568 569 570 571 572 573 574
uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	return adev->rev_id;
}

575 576 577 578 579 580 581
int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	return adev->gmc.noretry;
}

582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
				uint32_t vmid, uint64_t gpu_addr,
				uint32_t *ib_cmd, uint32_t ib_len)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	struct amdgpu_job *job;
	struct amdgpu_ib *ib;
	struct amdgpu_ring *ring;
	struct dma_fence *f = NULL;
	int ret;

	switch (engine) {
	case KGD_ENGINE_MEC1:
		ring = &adev->gfx.compute_ring[0];
		break;
	case KGD_ENGINE_SDMA1:
		ring = &adev->sdma.instance[0].ring;
		break;
	case KGD_ENGINE_SDMA2:
		ring = &adev->sdma.instance[1].ring;
		break;
	default:
		pr_err("Invalid engine in IB submission: %d\n", engine);
		ret = -EINVAL;
		goto err;
	}

	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
	if (ret)
		goto err;

	ib = &job->ibs[0];
	memset(ib, 0, sizeof(struct amdgpu_ib));

	ib->gpu_addr = gpu_addr;
	ib->ptr = ib_cmd;
	ib->length_dw = ib_len;
	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
	job->vmid = vmid;

	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
623

624 625 626 627 628 629 630 631 632 633 634 635 636 637
	if (ret) {
		DRM_ERROR("amdgpu: failed to schedule IB.\n");
		goto err_ib_sched;
	}

	ret = dma_fence_wait(f, false);

err_ib_sched:
	dma_fence_put(f);
	amdgpu_job_free(job);
err:
	return ret;
}

638 639 640 641
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

642 643 644
	amdgpu_dpm_switch_power_profile(adev,
					PP_SMC_POWER_PROFILE_COMPUTE,
					!idle);
645 646
}

647 648
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
649 650
	if (adev->kfd.dev)
		return vmid >= adev->vm_manager.first_kfd_vmid;
651 652 653

	return false;
}
654

655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	if (adev->family == AMDGPU_FAMILY_AI) {
		int i;

		for (i = 0; i < adev->num_vmhubs; i++)
			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
	} else {
		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
	}

	return 0;
}

int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
674
	const uint32_t flush_type = 0;
675 676 677 678 679 680 681 682
	bool all_hub = false;

	if (adev->family == AMDGPU_FAMILY_AI)
		all_hub = true;

	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
}

683 684 685 686 687 688
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	return adev->have_atomics_support;
}