Commit 36c9c3c9 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/linux into drm-next

This is a new pull for drm-next on top of last weeks with the following
changes:
- Fixed 64 bit divide
- Fixed vram type on vega20
- Misc vega20 fixes
- Misc DC fixes
- Fix GDS/GWS/OA domain handling

Previous changes from last week:
amdgpu/kfd:
- Picasso (new APU) support
- Raven2 (new APU) support
- Vega20 enablement
- ACP powergating improvements
- Add ABGR/XBGR display support
- VCN JPEG engine support
- Initial xGMI support
- Use load balancing for engine scheduling
- Lots of new documentation
- Rework and clean up i2c and aux handling in DC
- Add DP YCbCr 4:2:0 support in DC
- Add DMCU firmware loading for Raven (used for ABM and PSR)
- New debugfs features in DC
- LVDS support in DC
- Implement wave kill for gfx/compute (light weight reset for shaders)
- Use AGP aperture to avoid gart mappings when possible
- GPUVM performance improvements
- Bulk moves for more efficient GPUVM LRU handling
- Merge amdgpu and amdkfd into one module
- Enable gfxoff and stutter mode on Raven
- Misc cleanups

Scheduler:
- Load balancing support
- Bug fixes

ttm:
- Bulk move functionality
- Bug fixes

radeon:
- Misc cleanups
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180920150438.12693-1-alexander.deucher@amd.com
parents 0320ac51 846311ae
...@@ -505,7 +505,7 @@ GPU Scheduler ...@@ -505,7 +505,7 @@ GPU Scheduler
Overview Overview
-------- --------
.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Overview :doc: Overview
Scheduler Function References Scheduler Function References
...@@ -514,5 +514,5 @@ Scheduler Function References ...@@ -514,5 +514,5 @@ Scheduler Function References
.. kernel-doc:: include/drm/gpu_scheduler.h .. kernel-doc:: include/drm/gpu_scheduler.h
:internal: :internal:
.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:export: :export:
...@@ -285,8 +285,6 @@ source "drivers/gpu/drm/bridge/Kconfig" ...@@ -285,8 +285,6 @@ source "drivers/gpu/drm/bridge/Kconfig"
source "drivers/gpu/drm/sti/Kconfig" source "drivers/gpu/drm/sti/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig"
source "drivers/gpu/drm/imx/Kconfig" source "drivers/gpu/drm/imx/Kconfig"
source "drivers/gpu/drm/v3d/Kconfig" source "drivers/gpu/drm/v3d/Kconfig"
......
...@@ -42,3 +42,4 @@ config DRM_AMDGPU_GART_DEBUGFS ...@@ -42,3 +42,4 @@ config DRM_AMDGPU_GART_DEBUGFS
source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/amd/display/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig"
...@@ -35,7 +35,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ ...@@ -35,7 +35,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH) \ -I$(FULL_AMD_DISPLAY_PATH) \
-I$(FULL_AMD_DISPLAY_PATH)/include \ -I$(FULL_AMD_DISPLAY_PATH)/include \
-I$(FULL_AMD_DISPLAY_PATH)/dc \ -I$(FULL_AMD_DISPLAY_PATH)/dc \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
amdgpu-y := amdgpu_drv.o amdgpu-y := amdgpu_drv.o
...@@ -51,8 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ ...@@ -51,8 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
amdgpu_ids.o amdgpu_gmc.o amdgpu_xgmi.o
# add asic specific block # add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
...@@ -62,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce ...@@ -62,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \ amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o vega20_reg_init.o nbio_v7_4.o
# add DF block # add DF block
amdgpu-y += \ amdgpu-y += \
...@@ -73,7 +74,7 @@ amdgpu-y += \ ...@@ -73,7 +74,7 @@ amdgpu-y += \
amdgpu-y += \ amdgpu-y += \
gmc_v7_0.o \ gmc_v7_0.o \
gmc_v8_0.o \ gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o
# add IH block # add IH block
amdgpu-y += \ amdgpu-y += \
...@@ -88,7 +89,8 @@ amdgpu-y += \ ...@@ -88,7 +89,8 @@ amdgpu-y += \
amdgpu-y += \ amdgpu-y += \
amdgpu_psp.o \ amdgpu_psp.o \
psp_v3_1.o \ psp_v3_1.o \
psp_v10_0.o psp_v10_0.o \
psp_v11_0.o
# add SMC block # add SMC block
amdgpu-y += \ amdgpu-y += \
...@@ -108,6 +110,7 @@ amdgpu-y += \ ...@@ -108,6 +110,7 @@ amdgpu-y += \
# add async DMA block # add async DMA block
amdgpu-y += \ amdgpu-y += \
amdgpu_sdma.o \
sdma_v2_4.o \ sdma_v2_4.o \
sdma_v3_0.o \ sdma_v3_0.o \
sdma_v4_0.o sdma_v4_0.o
...@@ -134,6 +137,9 @@ amdgpu-y += \ ...@@ -134,6 +137,9 @@ amdgpu-y += \
amdgpu-y += amdgpu_amdkfd.o amdgpu-y += amdgpu_amdkfd.o
ifneq ($(CONFIG_HSA_AMD),) ifneq ($(CONFIG_HSA_AMD),)
AMDKFD_PATH := ../amdkfd
include $(FULL_AMD_PATH)/amdkfd/Makefile
amdgpu-y += $(AMDKFD_FILES)
amdgpu-y += \ amdgpu-y += \
amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gpuvm.o \
......
This diff is collapsed.
...@@ -116,136 +116,47 @@ static int acp_sw_fini(void *handle) ...@@ -116,136 +116,47 @@ static int acp_sw_fini(void *handle)
return 0; return 0;
} }
/* power off a tile/block within ACP */
static int acp_suspend_tile(void *cgs_dev, int tile)
{
u32 val = 0;
u32 count = 0;
if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
pr_err("Invalid ACP tile : %d to suspend\n", tile);
return -1;
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
val &= ACP_TILE_ON_MASK;
if (val == 0x0) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
val = val | (1 << tile);
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
0x500 + tile);
count = ACP_TIMEOUT_LOOP;
while (true) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ tile);
val = val & ACP_TILE_ON_MASK;
if (val == ACP_TILE_OFF_MASK)
break;
if (--count == 0) {
pr_err("Timeout reading ACP PGFSM status\n");
return -ETIMEDOUT;
}
udelay(100);
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
val |= ACP_TILE_OFF_RETAIN_REG_MASK;
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
}
return 0;
}
/* power on a tile/block within ACP */
static int acp_resume_tile(void *cgs_dev, int tile)
{
u32 val = 0;
u32 count = 0;
if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
pr_err("Invalid ACP tile to resume\n");
return -1;
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
val = val & ACP_TILE_ON_MASK;
if (val != 0x0) {
cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
0x600 + tile);
count = ACP_TIMEOUT_LOOP;
while (true) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ tile);
val = val & ACP_TILE_ON_MASK;
if (val == 0x0)
break;
if (--count == 0) {
pr_err("Timeout reading ACP PGFSM status\n");
return -ETIMEDOUT;
}
udelay(100);
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
if (tile == ACP_TILE_P1)
val = val & (ACP_TILE_P1_MASK);
else if (tile == ACP_TILE_P2)
val = val & (ACP_TILE_P2_MASK);
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
}
return 0;
}
struct acp_pm_domain { struct acp_pm_domain {
void *cgs_dev; void *adev;
struct generic_pm_domain gpd; struct generic_pm_domain gpd;
}; };
static int acp_poweroff(struct generic_pm_domain *genpd) static int acp_poweroff(struct generic_pm_domain *genpd)
{ {
int i, ret;
struct acp_pm_domain *apd; struct acp_pm_domain *apd;
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd); apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) { if (apd != NULL) {
/* Donot return abruptly if any of power tile fails to suspend. adev = apd->adev;
* Log it and continue powering off other tile /* call smu to POWER GATE ACP block
*/ * smu will
for (i = 4; i >= 0 ; i--) { * 1. turn off the acp clock
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i); * 2. power off the acp tiles
if (ret) * 3. check and enter ulv state
pr_err("ACP tile %d tile suspend failed\n", i); */
} if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
} }
return 0; return 0;
} }
static int acp_poweron(struct generic_pm_domain *genpd) static int acp_poweron(struct generic_pm_domain *genpd)
{ {
int i, ret;
struct acp_pm_domain *apd; struct acp_pm_domain *apd;
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd); apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) { if (apd != NULL) {
for (i = 0; i < 2; i++) { adev = apd->adev;
ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i); /* call smu to UNGATE ACP block
if (ret) { * smu will
pr_err("ACP tile %d resume failed\n", i); * 1. exit ulv
break; * 2. turn on acp clock
} * 3. power on acp tiles
} */
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
/* Disable DSPs which are not going to be used */ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
for (i = 0; i < 3; i++) {
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
/* Continue suspending other DSP, even if one fails */
if (ret)
pr_err("ACP DSP %d suspend failed\n", i);
}
} }
return 0; return 0;
} }
...@@ -289,30 +200,31 @@ static int acp_hw_init(void *handle) ...@@ -289,30 +200,31 @@ static int acp_hw_init(void *handle)
r = amd_acp_hw_init(adev->acp.cgs_device, r = amd_acp_hw_init(adev->acp.cgs_device,
ip_block->version->major, ip_block->version->minor); ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */ /* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV) if (r == -ENODEV) {
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0; return 0;
else if (r) } else if (r) {
return r; return r;
}
if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
return -EINVAL; return -EINVAL;
acp_base = adev->rmmio_base; acp_base = adev->rmmio_base;
if (adev->asic_type != CHIP_STONEY) {
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
adev->acp.acp_genpd->gpd.power_off = acp_poweroff; if (adev->acp.acp_genpd == NULL)
adev->acp.acp_genpd->gpd.power_on = acp_poweron; return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; adev->acp.acp_genpd->adev = adev;
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
}
adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
GFP_KERNEL); GFP_KERNEL);
...@@ -429,17 +341,16 @@ static int acp_hw_init(void *handle) ...@@ -429,17 +341,16 @@ static int acp_hw_init(void *handle)
if (r) if (r)
return r; return r;
if (adev->asic_type != CHIP_STONEY) { for (i = 0; i < ACP_DEVS ; i++) {
for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); if (r) {
if (r) { dev_err(dev, "Failed to add dev to genpd\n");
dev_err(dev, "Failed to add dev to genpd\n"); return r;
return r;
}
} }
} }
/* Assert Soft reset of ACP */ /* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
...@@ -497,8 +408,10 @@ static int acp_hw_fini(void *handle) ...@@ -497,8 +408,10 @@ static int acp_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* return early if no ACP */ /* return early if no ACP */
if (!adev->acp.acp_cell) if (!adev->acp.acp_genpd) {
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0; return 0;
}
/* Assert Soft reset of ACP */ /* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
...@@ -536,19 +449,17 @@ static int acp_hw_fini(void *handle) ...@@ -536,19 +449,17 @@ static int acp_hw_fini(void *handle)
udelay(100); udelay(100);
} }
if (adev->acp.acp_genpd) { for (i = 0; i < ACP_DEVS ; i++) {
for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); ret = pm_genpd_remove_device(dev);
ret = pm_genpd_remove_device(dev); /* If removal fails, dont giveup and try rest */
/* If removal fails, dont giveup and try rest */ if (ret)
if (ret) dev_err(dev, "remove dev from genpd failed\n");
dev_err(dev, "remove dev from genpd failed\n");
}
kfree(adev->acp.acp_genpd);
} }
mfd_remove_devices(adev->acp.parent); mfd_remove_devices(adev->acp.parent);
kfree(adev->acp.acp_res); kfree(adev->acp.acp_res);
kfree(adev->acp.acp_genpd);
kfree(adev->acp.acp_cell); kfree(adev->acp.acp_cell);
return 0; return 0;
...@@ -556,11 +467,21 @@ static int acp_hw_fini(void *handle) ...@@ -556,11 +467,21 @@ static int acp_hw_fini(void *handle)
static int acp_suspend(void *handle) static int acp_suspend(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* power up on suspend */
if (!adev->acp.acp_cell)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0; return 0;
} }
static int acp_resume(void *handle) static int acp_resume(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* power down again on resume */
if (!adev->acp.acp_cell)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0; return 0;
} }
...@@ -593,6 +514,12 @@ static int acp_set_clockgating_state(void *handle, ...@@ -593,6 +514,12 @@ static int acp_set_clockgating_state(void *handle,
static int acp_set_powergating_state(void *handle, static int acp_set_powergating_state(void *handle,
enum amd_powergating_state state) enum amd_powergating_state state)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = state == AMD_PG_STATE_GATE ? true : false;
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
return 0; return 0;
} }
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <drm/drm_crtc_helper.h> #include <drm/drm_crtc_helper.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_pm.h" #include "amdgpu_pm.h"
#include "amdgpu_display.h"
#include "amd_acpi.h" #include "amd_acpi.h"
#include "atom.h" #include "atom.h"
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <linux/module.h> #include <linux/module.h>
const struct kgd2kfd_calls *kgd2kfd; const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
static const unsigned int compute_vmid_bitmap = 0xFF00; static const unsigned int compute_vmid_bitmap = 0xFF00;
...@@ -36,45 +35,23 @@ int amdgpu_amdkfd_init(void) ...@@ -36,45 +35,23 @@ int amdgpu_amdkfd_init(void)
{ {
int ret; int ret;
#if defined(CONFIG_HSA_AMD_MODULE) #ifdef CONFIG_HSA_AMD
int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL)
return -ENOENT;
ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret) {
symbol_put(kgd2kfd_init);
kgd2kfd = NULL;
}
#elif defined(CONFIG_HSA_AMD)
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret) if (ret)
kgd2kfd = NULL; kgd2kfd = NULL;
amdgpu_amdkfd_gpuvm_init_mem_limits();
#else #else
kgd2kfd = NULL; kgd2kfd = NULL;
ret = -ENOENT; ret = -ENOENT;
#endif #endif
#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
amdgpu_amdkfd_gpuvm_init_mem_limits();
#endif
return ret; return ret;
} }
void amdgpu_amdkfd_fini(void) void amdgpu_amdkfd_fini(void)
{ {
if (kgd2kfd) { if (kgd2kfd)
kgd2kfd->exit(); kgd2kfd->exit();
symbol_put(kgd2kfd_init);
}
} }
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
...@@ -155,7 +132,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) ...@@ -155,7 +132,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
.gpuvm_size = min(adev->vm_manager.max_pfn .gpuvm_size = min(adev->vm_manager.max_pfn
<< AMDGPU_GPU_PAGE_SHIFT, << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_VA_HOLE_START), AMDGPU_GMC_HOLE_START),
.drm_render_minor = adev->ddev->render->index .drm_render_minor = adev->ddev->render->index
}; };
...@@ -267,7 +244,8 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) ...@@ -267,7 +244,8 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
amdgpu_device_gpu_recover(adev, NULL, false); if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
} }
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
...@@ -433,6 +411,13 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) ...@@ -433,6 +411,13 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
} }
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
return adev->gmc.xgmi.hive_id;
}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr, uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len) uint32_t *ib_cmd, uint32_t ib_len)
...@@ -506,7 +491,7 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) ...@@ -506,7 +491,7 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false; return false;
} }
#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD) #ifndef CONFIG_HSA_AMD
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{ {
return false; return false;
......
...@@ -145,6 +145,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); ...@@ -145,6 +145,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
#define read_user_wptr(mmptr, wptr, dst) \ #define read_user_wptr(mmptr, wptr, dst) \
({ \ ({ \
...@@ -162,16 +163,17 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); ...@@ -162,16 +163,17 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
}) })
/* GPUVM API */ /* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
void **process_info, void **vm, void **process_info,
struct dma_fence **ef); struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp, struct file *filp, unsigned int pasid,
void **vm, void **process_info, void **vm, void **process_info,
struct dma_fence **ef); struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm); struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size, struct kgd_dev *kgd, uint64_t va, uint64_t size,
......
...@@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base, .set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
......
...@@ -164,6 +164,7 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -164,6 +164,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base, .set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
......
...@@ -201,6 +201,7 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -201,6 +201,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base, .set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
...@@ -214,7 +215,8 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -214,7 +215,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs_vmid = invalidate_tlbs_vmid, .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib, .submit_ib = amdgpu_amdkfd_submit_ib,
.gpu_recover = amdgpu_amdkfd_gpu_reset, .gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle .set_compute_idle = amdgpu_amdkfd_set_compute_idle,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
}; };
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
......
...@@ -364,7 +364,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) ...@@ -364,7 +364,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
struct amdgpu_bo *pd = vm->root.base.bo; struct amdgpu_bo *pd = vm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
struct amdgpu_vm_parser param; struct amdgpu_vm_parser param;
uint64_t addr, flags = AMDGPU_PTE_VALID;
int ret; int ret;
param.domain = AMDGPU_GEM_DOMAIN_VRAM; param.domain = AMDGPU_GEM_DOMAIN_VRAM;
...@@ -383,9 +382,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) ...@@ -383,9 +382,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return ret; return ret;
} }
addr = amdgpu_bo_gpu_offset(vm->root.base.bo); vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
vm->pd_phys_addr = addr;
if (vm->use_cpu_for_update) { if (vm->use_cpu_for_update) {
ret = amdgpu_bo_kmap(pd, NULL); ret = amdgpu_bo_kmap(pd, NULL);
...@@ -678,7 +675,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ...@@ -678,7 +675,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
if (!ctx->vm_pd) if (!ctx->vm_pd)
return -ENOMEM; return -ENOMEM;
ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0; ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true; ctx->kfd_bo.tv.shared = true;
...@@ -743,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ...@@ -743,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
return -ENOMEM; return -ENOMEM;
} }
ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0; ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true; ctx->kfd_bo.tv.shared = true;
...@@ -1003,8 +998,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ...@@ -1003,8 +998,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
return ret; return ret;
} }
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
void **process_info, void **vm, void **process_info,
struct dma_fence **ef) struct dma_fence **ef)
{ {
struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_device *adev = get_amdgpu_device(kgd);
...@@ -1016,7 +1011,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, ...@@ -1016,7 +1011,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
return -ENOMEM; return -ENOMEM;
/* Initialize AMDGPU part of the VM */ /* Initialize AMDGPU part of the VM */
ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
if (ret) { if (ret) {
pr_err("Failed init vm ret %d\n", ret); pr_err("Failed init vm ret %d\n", ret);
goto amdgpu_vm_init_fail; goto amdgpu_vm_init_fail;
...@@ -1039,7 +1034,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, ...@@ -1039,7 +1034,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
} }
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp, struct file *filp, unsigned int pasid,
void **vm, void **process_info, void **vm, void **process_info,
struct dma_fence **ef) struct dma_fence **ef)
{ {
...@@ -1054,7 +1049,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, ...@@ -1054,7 +1049,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
return -EINVAL; return -EINVAL;
/* Convert VM into a compute VM */ /* Convert VM into a compute VM */
ret = amdgpu_vm_make_compute(adev, avm); ret = amdgpu_vm_make_compute(adev, avm, pasid);
if (ret) if (ret)
return ret; return ret;
...@@ -1117,6 +1112,25 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) ...@@ -1117,6 +1112,25 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
kfree(vm); kfree(vm);
} }
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
if (WARN_ON(!kgd || !vm))
return;
pr_debug("Releasing process vm %p\n", vm);
/* The original pasid of amdgpu vm has already been
* released during making a amdgpu vm to a compute vm
* The current pasid is managed by kfd and will be
* released on kfd process destroy. Set amdgpu pasid
* to 0 to avoid duplicate release.
*/
amdgpu_vm_release_compute(adev, avm);
}
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
{ {
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "amdgpu_atombios.h" #include "amdgpu_atombios.h"
#include "amdgpu_atomfirmware.h" #include "amdgpu_atomfirmware.h"
#include "amdgpu_i2c.h" #include "amdgpu_i2c.h"
#include "amdgpu_display.h"
#include "atom.h" #include "atom.h"
#include "atom-bits.h" #include "atom-bits.h"
......
...@@ -117,6 +117,10 @@ union igp_info { ...@@ -117,6 +117,10 @@ union igp_info {
union umc_info { union umc_info {
struct atom_umc_info_v3_1 v31; struct atom_umc_info_v3_1 v31;
}; };
union vram_info {
struct atom_vram_info_header_v2_3 v23;
};
/* /*
* Return vram width from integrated system info table, if available, * Return vram width from integrated system info table, if available,
* or 0 if not. * or 0 if not.
...@@ -174,7 +178,7 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, ...@@ -174,7 +178,7 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
case ATOM_DGPU_VRAM_TYPE_GDDR5: case ATOM_DGPU_VRAM_TYPE_GDDR5:
vram_type = AMDGPU_VRAM_TYPE_GDDR5; vram_type = AMDGPU_VRAM_TYPE_GDDR5;
break; break;
case ATOM_DGPU_VRAM_TYPE_HBM: case ATOM_DGPU_VRAM_TYPE_HBM2:
vram_type = AMDGPU_VRAM_TYPE_HBM; vram_type = AMDGPU_VRAM_TYPE_HBM;
break; break;
default: default:
...@@ -195,7 +199,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) ...@@ -195,7 +199,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
int index; int index;
u16 data_offset, size; u16 data_offset, size;
union igp_info *igp_info; union igp_info *igp_info;
union umc_info *umc_info; union vram_info *vram_info;
u8 frev, crev; u8 frev, crev;
u8 mem_type; u8 mem_type;
...@@ -204,7 +208,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) ...@@ -204,7 +208,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
integratedsysteminfo); integratedsysteminfo);
else else
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
umc_info); vram_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context, if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size, index, &size,
&frev, &crev, &data_offset)) { &frev, &crev, &data_offset)) {
...@@ -219,11 +223,11 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) ...@@ -219,11 +223,11 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
return 0; return 0;
} }
} else { } else {
umc_info = (union umc_info *) vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset); (mode_info->atom_context->bios + data_offset);
switch (crev) { switch (crev) {
case 1: case 3:
mem_type = umc_info->v31.vram_type; mem_type = vram_info->v23.vram_module[0].memory_type;
return convert_atom_mem_type_to_vram_type(adev, mem_type); return convert_atom_mem_type_to_vram_type(adev, mem_type);
default: default:
return 0; return 0;
......
...@@ -49,8 +49,11 @@ static void amdgpu_bo_list_free(struct kref *ref) ...@@ -49,8 +49,11 @@ static void amdgpu_bo_list_free(struct kref *ref)
refcount); refcount);
struct amdgpu_bo_list_entry *e; struct amdgpu_bo_list_entry *e;
amdgpu_bo_list_for_each_entry(e, list) amdgpu_bo_list_for_each_entry(e, list) {
amdgpu_bo_unref(&e->robj); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
amdgpu_bo_unref(&bo);
}
call_rcu(&list->rhead, amdgpu_bo_list_free_rcu); call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
} }
...@@ -67,7 +70,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, ...@@ -67,7 +70,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
unsigned i; unsigned i;
int r; int r;
if (num_entries > SIZE_MAX / sizeof(struct amdgpu_bo_list_entry)) if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
/ sizeof(struct amdgpu_bo_list_entry))
return -EINVAL; return -EINVAL;
size = sizeof(struct amdgpu_bo_list); size = sizeof(struct amdgpu_bo_list);
...@@ -111,21 +115,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, ...@@ -111,21 +115,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
entry = &array[last_entry++]; entry = &array[last_entry++];
} }
entry->robj = bo;
entry->priority = min(info[i].bo_priority, entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY); AMDGPU_BO_LIST_MAX_PRIORITY);
entry->tv.bo = &entry->robj->tbo; entry->tv.bo = &bo->tbo;
entry->tv.shared = !entry->robj->prime_shared_count; entry->tv.shared = !bo->prime_shared_count;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = entry->robj; list->gds_obj = bo;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
list->gws_obj = entry->robj; list->gws_obj = bo;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
list->oa_obj = entry->robj; list->oa_obj = bo;
total_size += amdgpu_bo_size(entry->robj); total_size += amdgpu_bo_size(bo);
trace_amdgpu_bo_list_set(list, entry->robj); trace_amdgpu_bo_list_set(list, bo);
} }
list->first_userptr = first_userptr; list->first_userptr = first_userptr;
...@@ -137,8 +140,11 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, ...@@ -137,8 +140,11 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return 0; return 0;
error_free: error_free:
while (i--) while (i--) {
amdgpu_bo_unref(&array[i].robj); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
amdgpu_bo_unref(&bo);
}
kvfree(list); kvfree(list);
return r; return r;
...@@ -190,9 +196,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, ...@@ -190,9 +196,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
* with the same priority, i.e. it must be stable. * with the same priority, i.e. it must be stable.
*/ */
amdgpu_bo_list_for_each_entry(e, list) { amdgpu_bo_list_for_each_entry(e, list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
unsigned priority = e->priority; unsigned priority = e->priority;
if (!e->robj->parent) if (!bo->parent)
list_add_tail(&e->tv.head, &bucket[priority]); list_add_tail(&e->tv.head, &bucket[priority]);
e->user_pages = NULL; e->user_pages = NULL;
......
...@@ -32,7 +32,6 @@ struct amdgpu_bo_va; ...@@ -32,7 +32,6 @@ struct amdgpu_bo_va;
struct amdgpu_fpriv; struct amdgpu_fpriv;
struct amdgpu_bo_list_entry { struct amdgpu_bo_list_entry {
struct amdgpu_bo *robj;
struct ttm_validate_buffer tv; struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va; struct amdgpu_bo_va *bo_va;
uint32_t priority; uint32_t priority;
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "atombios_dp.h" #include "atombios_dp.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_i2c.h" #include "amdgpu_i2c.h"
#include "amdgpu_display.h"
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
......
This diff is collapsed.
This diff is collapsed.
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_CTX_H__
#define __AMDGPU_CTX_H__
#include "amdgpu_ring.h"
struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
struct amdgpu_ctx_entity {
uint64_t sequence;
struct dma_fence **fences;
struct drm_sched_entity entity;
};
struct amdgpu_ctx {
struct kref refcount;
struct amdgpu_device *adev;
unsigned reset_counter;
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
struct dma_fence **fences;
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
struct mutex lock;
atomic_t guilty;
};
struct amdgpu_ctx_mgr {
struct amdgpu_device *adev;
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
};
extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity);
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
struct dma_fence *fence, uint64_t *seq);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
uint64_t seq);
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
#endif
This diff is collapsed.
...@@ -23,6 +23,21 @@ ...@@ -23,6 +23,21 @@
#ifndef __AMDGPU_DISPLAY_H__ #ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
struct drm_framebuffer * struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev, amdgpu_display_user_framebuffer_create(struct drm_device *dev,
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_irq.h" #include "amdgpu_irq.h"
#include "amdgpu_gem.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
...@@ -113,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff; ...@@ -113,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32; uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL; char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL; char *amdgpu_virtual_display = NULL;
/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/ /* OverDrive(bit 14) disabled by default*/
uint amdgpu_pp_feature_mask = 0xfffd3fff; uint amdgpu_pp_feature_mask = 0xffffbfff;
int amdgpu_ngg = 0; int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0; int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0;
...@@ -531,6 +532,102 @@ MODULE_PARM_DESC(smu_memory_pool_size, ...@@ -531,6 +532,102 @@ MODULE_PARM_DESC(smu_memory_pool_size,
"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
#ifdef CONFIG_HSA_AMD
/**
* DOC: sched_policy (int)
* Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription.
* Setting 1 disables over-subscription. Setting 2 disables HWS and statically
* assigns queues to HQDs.
*/
int sched_policy = KFD_SCHED_POLICY_HWS;
module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
/**
* DOC: hws_max_conc_proc (int)
* Maximum number of processes that HWS can schedule concurrently. The maximum is the
* number of VMIDs assigned to the HWS, which is also the default.
*/
int hws_max_conc_proc = 8;
module_param(hws_max_conc_proc, int, 0444);
MODULE_PARM_DESC(hws_max_conc_proc,
"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
/**
* DOC: cwsr_enable (int)
* CWSR(compute wave store and resume) allows the GPU to preempt shader execution in
* the middle of a compute wave. Default is 1 to enable this feature. Setting 0
* disables it.
*/
int cwsr_enable = 1;
module_param(cwsr_enable, int, 0444);
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
/**
* DOC: max_num_of_queues_per_device (int)
* Maximum number of queues per device. Valid setting is between 1 and 4096. Default
* is 4096.
*/
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
/**
* DOC: send_sigterm (int)
* Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm
* but just print errors on dmesg. Setting 1 enables sending sigterm.
*/
int send_sigterm;
module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
/**
* DOC: debug_largebar (int)
* Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
* system. This limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB.
* Default value is 0, diabled.
*/
int debug_largebar;
module_param(debug_largebar, int, 0444);
MODULE_PARM_DESC(debug_largebar,
"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
/**
* DOC: ignore_crat (int)
* Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
* table to get information about AMD APUs. This option can serve as a workaround on
* systems with a broken CRAT table.
*/
int ignore_crat;
module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
/**
* DOC: noretry (int)
* This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry.
* Setting 1 disables retry.
* Retry is needed for recoverable page faults.
*/
int noretry;
module_param(noretry, int, 0644);
MODULE_PARM_DESC(noretry,
"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
/**
* DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
* Setting 1 enables halt on hang.
*/
int halt_if_hws_hang;
module_param(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
#endif
static const struct pci_device_id pciidlist[] = { static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI #ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
...@@ -769,14 +866,15 @@ static const struct pci_device_id pciidlist[] = { ...@@ -769,14 +866,15 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
/* Vega 20 */ /* Vega 20 */
{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
/* Raven */ /* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0, 0, 0} {0, 0, 0}
}; };
...@@ -803,14 +901,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ...@@ -803,14 +901,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV; return -ENODEV;
} }
/*
* Initialize amdkfd before starting radeon. If it was not loaded yet,
* defer radeon probing
*/
ret = amdgpu_amdkfd_init();
if (ret == -EPROBE_DEFER)
return ret;
/* Get rid of things like offb */ /* Get rid of things like offb */
ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb"); ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
if (ret) if (ret)
...@@ -855,8 +945,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) ...@@ -855,8 +945,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
{ {
struct drm_device *dev = pci_get_drvdata(pdev); struct drm_device *dev = pci_get_drvdata(pdev);
drm_dev_unregister(dev); DRM_ERROR("Device removal is currently not supported outside of fbcon\n");
drm_dev_put(dev); drm_dev_unplug(dev);
pci_disable_device(pdev); pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
} }
...@@ -1151,6 +1241,10 @@ static int __init amdgpu_init(void) ...@@ -1151,6 +1241,10 @@ static int __init amdgpu_init(void)
pdriver = &amdgpu_kms_pci_driver; pdriver = &amdgpu_kms_pci_driver;
driver->num_ioctls = amdgpu_max_kms_ioctl; driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler(); amdgpu_register_atpx_handler();
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
/* let modprobe override vga console setting */ /* let modprobe override vga console setting */
return pci_register_driver(pdriver); return pci_register_driver(pdriver);
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "atom.h" #include "atom.h"
#include "atombios_encoders.h" #include "atombios_encoders.h"
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "cikd.h" #include "cikd.h"
#include "amdgpu_gem.h"
#include <drm/drm_fb_helper.h> #include <drm/drm_fb_helper.h>
......
...@@ -195,19 +195,6 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) ...@@ -195,19 +195,6 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
return 0; return 0;
} }
/**
* amdgpu_fence_schedule_fallback - schedule fallback check
*
* @ring: pointer to struct amdgpu_ring
*
* Start a timer as fallback to our interrupts.
*/
static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
{
mod_timer(&ring->fence_drv.fallback_timer,
jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
}
/** /**
* amdgpu_fence_process - check for fence activity * amdgpu_fence_process - check for fence activity
* *
...@@ -229,9 +216,6 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) ...@@ -229,9 +216,6 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
if (seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
if (unlikely(seq == last_seq)) if (unlikely(seq == last_seq))
return; return;
...@@ -262,21 +246,6 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) ...@@ -262,21 +246,6 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
} while (last_seq != seq); } while (last_seq != seq);
} }
/**
* amdgpu_fence_fallback - fallback for hardware interrupts
*
* @work: delayed work item
*
* Checks for fence activity.
*/
static void amdgpu_fence_fallback(struct timer_list *t)
{
struct amdgpu_ring *ring = from_timer(ring, t,
fence_drv.fallback_timer);
amdgpu_fence_process(ring);
}
/** /**
* amdgpu_fence_wait_empty - wait for all fences to signal * amdgpu_fence_wait_empty - wait for all fences to signal
* *
...@@ -424,8 +393,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, ...@@ -424,8 +393,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
atomic_set(&ring->fence_drv.last_seq, 0); atomic_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false; ring->fence_drv.initialized = false;
timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1; ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
spin_lock_init(&ring->fence_drv.lock); spin_lock_init(&ring->fence_drv.lock);
ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
...@@ -501,7 +468,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) ...@@ -501,7 +468,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src, amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type); ring->fence_drv.irq_type);
drm_sched_fini(&ring->sched); drm_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
dma_fence_put(ring->fence_drv.fences[j]); dma_fence_put(ring->fence_drv.fences[j]);
kfree(ring->fence_drv.fences); kfree(ring->fence_drv.fences);
...@@ -594,27 +560,6 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) ...@@ -594,27 +560,6 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
return (const char *)fence->ring->name; return (const char *)fence->ring->name;
} }
/**
* amdgpu_fence_enable_signaling - enable signalling on fence
* @fence: fence
*
* This function is called with fence_queue lock held, and adds a callback
* to fence_queue that checks if this fence is signaled, and if so it
* signals the fence and removes itself.
*/
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
{
struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_ring *ring = fence->ring;
if (!timer_pending(&ring->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(ring);
DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
return true;
}
/** /**
* amdgpu_fence_free - free up the fence memory * amdgpu_fence_free - free up the fence memory
* *
...@@ -645,7 +590,6 @@ static void amdgpu_fence_release(struct dma_fence *f) ...@@ -645,7 +590,6 @@ static void amdgpu_fence_release(struct dma_fence *f)
static const struct dma_fence_ops amdgpu_fence_ops = { static const struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name, .get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name, .get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
.release = amdgpu_fence_release, .release = amdgpu_fence_release,
}; };
...@@ -701,7 +645,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) ...@@ -701,7 +645,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
seq_printf(m, "gpu recover\n"); seq_printf(m, "gpu recover\n");
amdgpu_device_gpu_recover(adev, NULL, true); amdgpu_device_gpu_recover(adev, NULL);
return 0; return 0;
} }
......
...@@ -112,7 +112,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) ...@@ -112,7 +112,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
struct amdgpu_bo_param bp; struct amdgpu_bo_param bp;
memset(&bp, 0, sizeof(bp)); memset(&bp, 0, sizeof(bp));
...@@ -123,7 +123,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) ...@@ -123,7 +123,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel; bp.type = ttm_bo_type_kernel;
bp.resv = NULL; bp.resv = NULL;
r = amdgpu_bo_create(adev, &bp, &adev->gart.robj); r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
if (r) { if (r) {
return r; return r;
} }
...@@ -145,19 +145,18 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) ...@@ -145,19 +145,18 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
{ {
int r; int r;
r = amdgpu_bo_reserve(adev->gart.robj, false); r = amdgpu_bo_reserve(adev->gart.bo, false);
if (unlikely(r != 0)) if (unlikely(r != 0))
return r; return r;
r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM); r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
if (r) { if (r) {
amdgpu_bo_unreserve(adev->gart.robj); amdgpu_bo_unreserve(adev->gart.bo);
return r; return r;
} }
r = amdgpu_bo_kmap(adev->gart.robj, &adev->gart.ptr); r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
if (r) if (r)
amdgpu_bo_unpin(adev->gart.robj); amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.robj); amdgpu_bo_unreserve(adev->gart.bo);
adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj);
return r; return r;
} }
...@@ -173,14 +172,14 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) ...@@ -173,14 +172,14 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
return; return;
} }
r = amdgpu_bo_reserve(adev->gart.robj, true); r = amdgpu_bo_reserve(adev->gart.bo, true);
if (likely(r == 0)) { if (likely(r == 0)) {
amdgpu_bo_kunmap(adev->gart.robj); amdgpu_bo_kunmap(adev->gart.bo);
amdgpu_bo_unpin(adev->gart.robj); amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.robj); amdgpu_bo_unreserve(adev->gart.bo);
adev->gart.ptr = NULL; adev->gart.ptr = NULL;
} }
} }
...@@ -196,10 +195,10 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) ...@@ -196,10 +195,10 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
*/ */
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
{ {
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
return; return;
} }
amdgpu_bo_unref(&adev->gart.robj); amdgpu_bo_unref(&adev->gart.bo);
} }
/* /*
......
...@@ -40,8 +40,7 @@ struct amdgpu_bo; ...@@ -40,8 +40,7 @@ struct amdgpu_bo;
#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE) #define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
struct amdgpu_gart { struct amdgpu_gart {
u64 table_addr; struct amdgpu_bo *bo;
struct amdgpu_bo *robj;
void *ptr; void *ptr;
unsigned num_gpu_pages; unsigned num_gpu_pages;
unsigned num_cpu_pages; unsigned num_cpu_pages;
......
...@@ -24,13 +24,6 @@ ...@@ -24,13 +24,6 @@
#ifndef __AMDGPU_GDS_H__ #ifndef __AMDGPU_GDS_H__
#define __AMDGPU_GDS_H__ #define __AMDGPU_GDS_H__
/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned,
* we should report GDS/GWS/OA size as PAGE_SIZE aligned
* */
#define AMDGPU_GDS_SHIFT 2
#define AMDGPU_GWS_SHIFT PAGE_SHIFT
#define AMDGPU_OA_SHIFT PAGE_SHIFT
struct amdgpu_ring; struct amdgpu_ring;
struct amdgpu_bo; struct amdgpu_bo;
......
...@@ -244,16 +244,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, ...@@ -244,16 +244,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL; return -EINVAL;
} }
flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) /* GDS allocations must be DW aligned */
size = size << AMDGPU_GDS_SHIFT; if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) size = ALIGN(size, 4);
size = size << AMDGPU_GWS_SHIFT;
else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
size = size << AMDGPU_OA_SHIFT;
else
return -EINVAL;
} }
size = roundup(size, PAGE_SIZE);
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
r = amdgpu_bo_reserve(vm->root.base.bo, false); r = amdgpu_bo_reserve(vm->root.base.bo, false);
...@@ -572,16 +566,16 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -572,16 +566,16 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL; return -EINVAL;
} }
if (args->va_address >= AMDGPU_VA_HOLE_START && if (args->va_address >= AMDGPU_GMC_HOLE_START &&
args->va_address < AMDGPU_VA_HOLE_END) { args->va_address < AMDGPU_GMC_HOLE_END) {
dev_dbg(&dev->pdev->dev, dev_dbg(&dev->pdev->dev,
"va_address 0x%LX is in VA hole 0x%LX-0x%LX\n", "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
args->va_address, AMDGPU_VA_HOLE_START, args->va_address, AMDGPU_GMC_HOLE_START,
AMDGPU_VA_HOLE_END); AMDGPU_GMC_HOLE_END);
return -EINVAL; return -EINVAL;
} }
args->va_address &= AMDGPU_VA_HOLE_MASK; args->va_address &= AMDGPU_GMC_HOLE_MASK;
if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n", dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n",
......
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_GEM_H__
#define __AMDGPU_GEM_H__
#include <drm/amdgpu_drm.h>
#include <drm/drm_gem.h>
/*
* GEM.
*/
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
void amdgpu_gem_object_free(struct drm_gem_object *obj);
int amdgpu_gem_object_open(struct drm_gem_object *obj,
struct drm_file *file_priv);
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
/*
* GEM objects.
*/
void amdgpu_gem_force_release(struct amdgpu_device *adev);
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
struct drm_gem_object **obj);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct drm_device *dev,
uint32_t handle, uint64_t *offset_p);
int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
#endif
...@@ -26,9 +26,44 @@ ...@@ -26,9 +26,44 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_gfx.h" #include "amdgpu_gfx.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
/* /*
* GPU scratch registers helpers function. * GPU GFX IP block helpers function.
*/ */
int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
int pipe, int queue)
{
int bit = 0;
bit += mec * adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
bit += pipe * adev->gfx.mec.num_queue_per_pipe;
bit += queue;
return bit;
}
void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue)
{
*queue = bit % adev->gfx.mec.num_queue_per_pipe;
*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
}
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
adev->gfx.mec.queue_bitmap);
}
/** /**
* amdgpu_gfx_scratch_get - Allocate a scratch register * amdgpu_gfx_scratch_get - Allocate a scratch register
* *
...@@ -340,3 +375,40 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) ...@@ -340,3 +375,40 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
&ring->mqd_gpu_addr, &ring->mqd_gpu_addr,
&ring->mqd_ptr); &ring->mqd_ptr);
} }
/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
*
* @adev: amdgpu_device pointer
* @bool enable true: enable gfx off feature, false: disable gfx off feature
*
* 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
* 2. other client can send request to disable gfx off feature, the request should be honored.
* 3. other client can cancel their request of disable gfx off feature
* 4. other client should not send request to enable gfx off feature before disable gfx off feature.
*/
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
{
if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
return;
if (!adev->powerplay.pp_funcs->set_powergating_by_smu)
return;
mutex_lock(&adev->gfx.gfx_off_mutex);
if (!enable)
adev->gfx.gfx_off_req_count++;
else if (adev->gfx.gfx_off_req_count > 0)
adev->gfx.gfx_off_req_count--;
if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
} else if (!enable && adev->gfx.gfx_off_state) {
if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false))
adev->gfx.gfx_off_state = false;
}
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
This diff is collapsed.
/*
* Copyright 2018 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
#include "amdgpu.h"
/**
* amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
*
* @bo: the BO to get the PDE for
* @level: the level in the PD hirarchy
* @addr: resulting addr
* @flags: resulting flags
*
* Get the address and flags to be used for a PDE (Page Directory Entry).
*/
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_dma_tt *ttm;
switch (bo->tbo.mem.mem_type) {
case TTM_PL_TT:
ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
*addr = ttm->dma_address[0];
break;
case TTM_PL_VRAM:
*addr = amdgpu_bo_gpu_offset(bo);
break;
default:
*addr = 0;
break;
}
*flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
}
/**
* amdgpu_gmc_pd_addr - return the address of the root directory
*
*/
uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
uint64_t pd_addr;
/* TODO: move that into ASIC specific code */
if (adev->asic_type >= CHIP_VEGA10) {
uint64_t flags = AMDGPU_PTE_VALID;
amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
pd_addr |= flags;
} else {
pd_addr = amdgpu_bo_gpu_offset(bo);
}
return pd_addr;
}
/**
* amdgpu_gmc_agp_addr - return the address in the AGP address space
*
* @tbo: TTM BO which needs the address, must be in GTT domain
*
* Tries to figure out how to access the BO through the AGP aperture. Returns
* AMDGPU_BO_INVALID_OFFSET if that is not possible.
*/
uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_dma_tt *ttm;
if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
return AMDGPU_BO_INVALID_OFFSET;
ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
return AMDGPU_BO_INVALID_OFFSET;
return adev->gmc.agp_start + ttm->dma_address[0];
}
/**
* amdgpu_gmc_vram_location - try to find VRAM location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
* @base: base address at which to put VRAM
*
* Function will try to place VRAM at base address provided
* as parameter.
*/
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base)
{
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
mc->vram_start = base;
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
if (limit && limit < mc->real_vram_size)
mc->real_vram_size = limit;
if (mc->xgmi.num_physical_nodes == 0) {
mc->fb_start = mc->vram_start;
mc->fb_end = mc->vram_end;
}
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
}
/**
* amdgpu_gmc_gart_location - try to find GART location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
* Function will place try to place GART before or after VRAM.
*
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{
const uint64_t four_gb = 0x100000000ULL;
u64 size_af, size_bf;
mc->gart_size += adev->pm.smu_prv_buffer_size;
/* VCE doesn't like it when BOs cross a 4GB segment, so align
* the GART base on a 4GB boundary as well.
*/
size_bf = mc->fb_start;
size_af = adev->gmc.mc_mask + 1 - ALIGN(mc->fb_end + 1, four_gb);
if (mc->gart_size > max(size_bf, size_af)) {
dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = max(size_bf, size_af);
}
if ((size_bf >= mc->gart_size && size_bf < size_af) ||
(size_af < mc->gart_size))
mc->gart_start = 0;
else
mc->gart_start = mc->mc_mask - mc->gart_size + 1;
mc->gart_start &= ~(four_gb - 1);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
mc->gart_size >> 20, mc->gart_start, mc->gart_end);
}
/**
* amdgpu_gmc_agp_location - try to find AGP location
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
* Function will place try to find a place for the AGP BAR in the MC address
* space.
*
* AGP BAR will be assigned the largest available hole in the address space.
* Should be called after VRAM and GART locations are setup.
*/
void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{
const uint64_t sixteen_gb = 1ULL << 34;
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
if (mc->fb_start > mc->gart_start) {
size_bf = (mc->fb_start & sixteen_gb_mask) -
ALIGN(mc->gart_end + 1, sixteen_gb);
size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
} else {
size_bf = mc->fb_start & sixteen_gb_mask;
size_af = (mc->gart_start & sixteen_gb_mask) -
ALIGN(mc->fb_end + 1, sixteen_gb);
}
if (size_bf > size_af) {
mc->agp_start = mc->fb_start > mc->gart_start ?
mc->gart_end + 1 : 0;
mc->agp_size = size_bf;
} else {
mc->agp_start = (mc->fb_start > mc->gart_start ?
mc->fb_end : mc->gart_end) + 1,
mc->agp_size = size_af;
}
mc->agp_start = ALIGN(mc->agp_start, sixteen_gb);
mc->agp_end = mc->agp_start + mc->agp_size - 1;
dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
mc->agp_size >> 20, mc->agp_start, mc->agp_end);
}
...@@ -30,6 +30,19 @@ ...@@ -30,6 +30,19 @@
#include "amdgpu_irq.h" #include "amdgpu_irq.h"
/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
#define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL
/*
* Hardware is programmed as if the hole doesn't exists with start and end
* address values.
*
* This mask is used to remove the upper 16bits of the VA and so come up with
* the linear addr value.
*/
#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL
struct firmware; struct firmware;
/* /*
...@@ -74,6 +87,20 @@ struct amdgpu_gmc_funcs { ...@@ -74,6 +87,20 @@ struct amdgpu_gmc_funcs {
u64 *dst, u64 *flags); u64 *dst, u64 *flags);
}; };
struct amdgpu_xgmi {
/* from psp */
u64 device_id;
u64 hive_id;
/* fixed per family */
u64 node_segment_size;
/* physical node (0-3) */
unsigned physical_node_id;
/* number of nodes (0-4) */
unsigned num_physical_nodes;
/* gpu list in the same hive */
struct list_head head;
};
struct amdgpu_gmc { struct amdgpu_gmc {
resource_size_t aper_size; resource_size_t aper_size;
resource_size_t aper_base; resource_size_t aper_base;
...@@ -81,11 +108,22 @@ struct amdgpu_gmc { ...@@ -81,11 +108,22 @@ struct amdgpu_gmc {
* about vram size near mc fb location */ * about vram size near mc fb location */
u64 mc_vram_size; u64 mc_vram_size;
u64 visible_vram_size; u64 visible_vram_size;
u64 agp_size;
u64 agp_start;
u64 agp_end;
u64 gart_size; u64 gart_size;
u64 gart_start; u64 gart_start;
u64 gart_end; u64 gart_end;
u64 vram_start; u64 vram_start;
u64 vram_end; u64 vram_end;
/* FB region , it's same as local vram region in single GPU, in XGMI
* configuration, this region covers all GPUs in the same hive ,
* each GPU in the hive has the same view of this FB region .
* GPU0's vram starts at offset (0 * segment size) ,
* GPU1 starts at offset (1 * segment size), etc.
*/
u64 fb_start;
u64 fb_end;
unsigned vram_width; unsigned vram_width;
u64 real_vram_size; u64 real_vram_size;
int vram_mtrr; int vram_mtrr;
...@@ -109,8 +147,17 @@ struct amdgpu_gmc { ...@@ -109,8 +147,17 @@ struct amdgpu_gmc {
atomic_t vm_fault_info_updated; atomic_t vm_fault_info_updated;
const struct amdgpu_gmc_funcs *gmc_funcs; const struct amdgpu_gmc_funcs *gmc_funcs;
struct amdgpu_xgmi xgmi;
}; };
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
/** /**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
* *
...@@ -126,4 +173,28 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc) ...@@ -126,4 +173,28 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
return (gmc->real_vram_size == gmc->visible_vram_size); return (gmc->real_vram_size == gmc->visible_vram_size);
} }
/**
* amdgpu_gmc_sign_extend - sign extend the given gmc address
*
* @addr: address to extend
*/
static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
{
if (addr >= AMDGPU_GMC_HOLE_START)
addr |= AMDGPU_GMC_HOLE_END;
return addr;
}
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags);
uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo);
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base);
void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
#endif #endif
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "atom.h" #include "atom.h"
#include "amdgpu_trace.h"
#define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000)
...@@ -170,6 +171,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -170,6 +171,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
(amdgpu_sriov_vf(adev) && need_ctx_switch) || (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
amdgpu_vm_need_pipeline_sync(ring, job))) { amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true; need_pipe_sync = true;
if (tmp)
trace_amdgpu_ib_pipe_sync(job, tmp);
dma_fence_put(tmp); dma_fence_put(tmp);
} }
......
...@@ -197,78 +197,3 @@ int amdgpu_ih_process(struct amdgpu_device *adev) ...@@ -197,78 +197,3 @@ int amdgpu_ih_process(struct amdgpu_device *adev)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
/**
* amdgpu_ih_add_fault - Add a page fault record
*
* @adev: amdgpu device pointer
* @key: 64-bit encoding of PASID and address
*
* This should be called when a retry page fault interrupt is
* received. If this is a new page fault, it will be added to a hash
* table. The return value indicates whether this is a new fault, or
* a fault that was already known and is already being handled.
*
* If there are too many pending page faults, this will fail. Retry
* interrupts should be ignored in this case until there is enough
* free space.
*
* Returns 0 if the fault was added, 1 if the fault was already known,
* -ENOSPC if there are too many pending faults.
*/
int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
{
unsigned long flags;
int r = -ENOSPC;
if (WARN_ON_ONCE(!adev->irq.ih.faults))
/* Should be allocated in <IP>_ih_sw_init on GPUs that
* support retry faults and require retry filtering.
*/
return r;
spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
/* Only let the hash table fill up to 50% for best performance */
if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
goto unlock_out;
r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
if (!r)
adev->irq.ih.faults->count++;
/* chash_table_copy_in should never fail unless we're losing count */
WARN_ON_ONCE(r < 0);
unlock_out:
spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
return r;
}
/**
* amdgpu_ih_clear_fault - Remove a page fault record
*
* @adev: amdgpu device pointer
* @key: 64-bit encoding of PASID and address
*
* This should be called when a page fault has been handled. Any
* future interrupt with this key will be processed as a new
* page fault.
*/
void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
{
unsigned long flags;
int r;
if (!adev->irq.ih.faults)
return;
spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
if (!WARN_ON_ONCE(r < 0)) {
adev->irq.ih.faults->count--;
WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
}
spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
}
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#ifndef __AMDGPU_IH_H__ #ifndef __AMDGPU_IH_H__
#define __AMDGPU_IH_H__ #define __AMDGPU_IH_H__
#include <linux/chash.h>
#include "soc15_ih_clientid.h" #include "soc15_ih_clientid.h"
struct amdgpu_device; struct amdgpu_device;
...@@ -32,13 +31,6 @@ struct amdgpu_device; ...@@ -32,13 +31,6 @@ struct amdgpu_device;
#define AMDGPU_IH_CLIENTID_LEGACY 0 #define AMDGPU_IH_CLIENTID_LEGACY 0
#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX #define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
#define AMDGPU_PAGEFAULT_HASH_BITS 8
struct amdgpu_retryfault_hashtable {
DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
spinlock_t lock;
int count;
};
/* /*
* R6xx+ IH ring * R6xx+ IH ring
*/ */
...@@ -57,7 +49,6 @@ struct amdgpu_ih_ring { ...@@ -57,7 +49,6 @@ struct amdgpu_ih_ring {
bool use_doorbell; bool use_doorbell;
bool use_bus_addr; bool use_bus_addr;
dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
struct amdgpu_retryfault_hashtable *faults;
}; };
#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4 #define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
...@@ -76,11 +67,24 @@ struct amdgpu_iv_entry { ...@@ -76,11 +67,24 @@ struct amdgpu_iv_entry {
const uint32_t *iv_entry; const uint32_t *iv_entry;
}; };
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev);
bool (*prescreen_iv)(struct amdgpu_device *adev);
void (*decode_iv)(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
void (*set_rptr)(struct amdgpu_device *adev);
};
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
bool use_bus_addr); bool use_bus_addr);
void amdgpu_ih_ring_fini(struct amdgpu_device *adev); void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
int amdgpu_ih_process(struct amdgpu_device *adev); int amdgpu_ih_process(struct amdgpu_device *adev);
int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
#endif #endif
...@@ -105,8 +105,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) ...@@ -105,8 +105,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
struct amdgpu_device *adev = container_of(work, struct amdgpu_device, struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work); reset_work);
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL, false); amdgpu_device_gpu_recover(adev, NULL);
} }
/** /**
......
...@@ -33,11 +33,18 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) ...@@ -33,11 +33,18 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job); struct amdgpu_job *job = to_amdgpu_job(s_job);
if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
DRM_ERROR("ring %s timeout, but soft recovered\n",
s_job->sched->name);
return;
}
DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
ring->fence_drv.sync_seq); ring->fence_drv.sync_seq);
amdgpu_device_gpu_recover(ring->adev, job, false); if (amdgpu_device_should_recover_gpu(ring->adev))
amdgpu_device_gpu_recover(ring->adev, job);
} }
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
...@@ -66,6 +73,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -66,6 +73,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->sync);
amdgpu_sync_create(&(*job)->sched_sync); amdgpu_sync_create(&(*job)->sched_sync);
(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
return 0; return 0;
} }
...@@ -82,8 +90,6 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, ...@@ -82,8 +90,6 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]); r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
if (r) if (r)
kfree(*job); kfree(*job);
else
(*job)->vm_pd_addr = adev->gart.table_addr;
return r; return r;
} }
......
This diff is collapsed.
...@@ -51,18 +51,6 @@ ...@@ -51,18 +51,6 @@
* *
*/ */
static bool amdgpu_bo_need_backup(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
return false;
if (amdgpu_gpu_recovery == 0 ||
(amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))
return false;
return true;
}
/** /**
* amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
* *
...@@ -163,10 +151,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) ...@@ -163,10 +151,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
if (domain & AMDGPU_GEM_DOMAIN_GTT) { if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0; places[c].fpfn = 0;
if (flags & AMDGPU_GEM_CREATE_SHADOW) places[c].lpfn = 0;
places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
else
places[c].lpfn = 0;
places[c].flags = TTM_PL_FLAG_TT; places[c].flags = TTM_PL_FLAG_TT;
if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
places[c].flags |= TTM_PL_FLAG_WC | places[c].flags |= TTM_PL_FLAG_WC |
...@@ -253,6 +238,11 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, ...@@ -253,6 +238,11 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
bool free = false; bool free = false;
int r; int r;
if (!size) {
amdgpu_bo_unref(bo_ptr);
return 0;
}
memset(&bp, 0, sizeof(bp)); memset(&bp, 0, sizeof(bp));
bp.size = size; bp.size = size;
bp.byte_align = align; bp.byte_align = align;
...@@ -346,7 +336,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, ...@@ -346,7 +336,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
if (r) if (r)
return r; return r;
amdgpu_bo_unreserve(*bo_ptr); if (*bo_ptr)
amdgpu_bo_unreserve(*bo_ptr);
return 0; return 0;
} }
...@@ -436,7 +427,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, ...@@ -436,7 +427,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
int r; int r;
page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE); if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA))
size <<= PAGE_SHIFT;
else
size = ALIGN(size, PAGE_SIZE);
if (!amdgpu_bo_validate_size(adev, size, bp->domain)) if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM; return -ENOMEM;
...@@ -451,7 +446,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, ...@@ -451,7 +446,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
return -ENOMEM; return -ENOMEM;
drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va); bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain; bp->domain;
bo->allowed_domains = bo->preferred_domains; bo->allowed_domains = bo->preferred_domains;
...@@ -541,7 +536,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, ...@@ -541,7 +536,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
} }
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size, int byte_align, unsigned long size,
struct amdgpu_bo *bo) struct amdgpu_bo *bo)
{ {
struct amdgpu_bo_param bp; struct amdgpu_bo_param bp;
...@@ -552,7 +547,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, ...@@ -552,7 +547,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
memset(&bp, 0, sizeof(bp)); memset(&bp, 0, sizeof(bp));
bp.size = size; bp.size = size;
bp.byte_align = byte_align;
bp.domain = AMDGPU_GEM_DOMAIN_GTT; bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW; AMDGPU_GEM_CREATE_SHADOW;
...@@ -563,7 +557,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, ...@@ -563,7 +557,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
if (!r) { if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo); bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock); mutex_lock(&adev->shadow_list_lock);
list_add_tail(&bo->shadow_list, &adev->shadow_list); list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
mutex_unlock(&adev->shadow_list_lock); mutex_unlock(&adev->shadow_list_lock);
} }
...@@ -596,12 +590,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev, ...@@ -596,12 +590,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (r) if (r)
return r; return r;
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_bo_need_backup(adev)) { if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
if (!bp->resv) if (!bp->resv)
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
NULL)); NULL));
r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr)); r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
if (!bp->resv) if (!bp->resv)
reservation_object_unlock((*bo_ptr)->tbo.resv); reservation_object_unlock((*bo_ptr)->tbo.resv);
...@@ -695,13 +689,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) ...@@ -695,13 +689,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
} }
/** /**
* amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
* @adev: amdgpu device object *
* @ring: amdgpu_ring for the engine handling the buffer operations * @shadow: &amdgpu_bo shadow to be restored
* @bo: &amdgpu_bo buffer to be restored
* @resv: reservation object with embedded fence
* @fence: dma_fence associated with the operation * @fence: dma_fence associated with the operation
* @direct: whether to submit the job directly
* *
* Copies a buffer object's shadow content back to the object. * Copies a buffer object's shadow content back to the object.
* This is used for recovering a buffer from its shadow in case of a gpu * This is used for recovering a buffer from its shadow in case of a gpu
...@@ -710,36 +701,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) ...@@ -710,36 +701,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
* Returns: * Returns:
* 0 for success or a negative error code on failure. * 0 for success or a negative error code on failure.
*/ */
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct)
{ {
struct amdgpu_bo *shadow = bo->shadow; struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
uint64_t bo_addr, shadow_addr; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
int r; uint64_t shadow_addr, parent_addr;
if (!shadow)
return -EINVAL;
bo_addr = amdgpu_bo_gpu_offset(bo); shadow_addr = amdgpu_bo_gpu_offset(shadow);
shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
r = reservation_object_reserve_shared(bo->tbo.resv); return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
if (r) amdgpu_bo_size(shadow), NULL, fence,
goto err; true, false);
r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
amdgpu_bo_size(bo), resv, fence,
direct, false);
if (!r)
amdgpu_bo_fence(bo, *fence, true);
err:
return r;
} }
/** /**
...@@ -1019,10 +993,12 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) ...@@ -1019,10 +993,12 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
int amdgpu_bo_evict_vram(struct amdgpu_device *adev) int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{ {
/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
if (0 && (adev->flags & AMD_IS_APU)) { #ifndef CONFIG_HIBERNATION
if (adev->flags & AMD_IS_APU) {
/* Useless to evict on IGP chips */ /* Useless to evict on IGP chips */
return 0; return 0;
} }
#endif
return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
} }
...@@ -1360,15 +1336,13 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, ...@@ -1360,15 +1336,13 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{ {
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem));
WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
!bo->pin_count); !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel);
WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
!(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
return bo->tbo.offset; return amdgpu_gmc_sign_extend(bo->tbo.offset);
} }
/** /**
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -226,6 +226,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, ...@@ -226,6 +226,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
struct amdgpu_sa_bo *sa_bo; struct amdgpu_sa_bo *sa_bo;
fences[i] = NULL;
if (list_empty(&sa_manager->flist[i])) if (list_empty(&sa_manager->flist[i]))
continue; continue;
...@@ -296,10 +298,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, ...@@ -296,10 +298,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
spin_lock(&sa_manager->wq.lock); spin_lock(&sa_manager->wq.lock);
do { do {
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
fences[i] = NULL;
tries[i] = 0; tries[i] = 0;
}
do { do {
amdgpu_sa_bo_try_free(sa_manager); amdgpu_sa_bo_try_free(sa_manager);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment