Commit a0a53aa8 authored by Samuel Li's avatar Samuel Li Committed by Alex Deucher

drm/radeon: Use direct mapping for fast fb access on RS690

This patch allows the CPU to map the stolen vram segment
directly rather than going through the PCI BAR.  This
significantly improves performance for certain workloads with
a properly patched ddx.

Use radeon.fastfb=1 to enable it (disabled by default).
Currently only supported on RS690, but support for RS780/880
and newer APUs may be added eventually.
Signed-off-by: default avatarSamuel Li <samuel.li@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 7c1c7c18
...@@ -95,6 +95,7 @@ extern int radeon_hw_i2c; ...@@ -95,6 +95,7 @@ extern int radeon_hw_i2c;
extern int radeon_pcie_gen2; extern int radeon_pcie_gen2;
extern int radeon_msi; extern int radeon_msi;
extern int radeon_lockup_timeout; extern int radeon_lockup_timeout;
extern int radeon_fastfb;
/* /*
* Copy from radeon_drv.h so we don't have to include both and have conflicting * Copy from radeon_drv.h so we don't have to include both and have conflicting
...@@ -1616,6 +1617,7 @@ struct radeon_device { ...@@ -1616,6 +1617,7 @@ struct radeon_device {
bool suspend; bool suspend;
bool need_dma32; bool need_dma32;
bool accel_working; bool accel_working;
bool fastfb_working; /* IGP feature*/
struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
const struct firmware *me_fw; /* all family ME firmware */ const struct firmware *me_fw; /* all family ME firmware */
const struct firmware *pfp_fw; /* r6/700 PFP firmware */ const struct firmware *pfp_fw; /* r6/700 PFP firmware */
......
...@@ -71,9 +71,10 @@ ...@@ -71,9 +71,10 @@
* 2.28.0 - r600-eg: Add MEM_WRITE packet support * 2.28.0 - r600-eg: Add MEM_WRITE packet support
* 2.29.0 - R500 FP16 color clear registers * 2.29.0 - R500 FP16 color clear registers
* 2.30.0 - fix for FMASK texturing * 2.30.0 - fix for FMASK texturing
* 2.31.0 - Add fastfb support for rs690
*/ */
#define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MAJOR 2
#define KMS_DRIVER_MINOR 30 #define KMS_DRIVER_MINOR 31
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev); int radeon_driver_unload_kms(struct drm_device *dev);
...@@ -160,6 +161,7 @@ int radeon_hw_i2c = 0; ...@@ -160,6 +161,7 @@ int radeon_hw_i2c = 0;
int radeon_pcie_gen2 = -1; int radeon_pcie_gen2 = -1;
int radeon_msi = -1; int radeon_msi = -1;
int radeon_lockup_timeout = 10000; int radeon_lockup_timeout = 10000;
int radeon_fastfb = 0;
MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers"); MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
module_param_named(no_wb, radeon_no_wb, int, 0444); module_param_named(no_wb, radeon_no_wb, int, 0444);
...@@ -212,6 +214,9 @@ module_param_named(msi, radeon_msi, int, 0444); ...@@ -212,6 +214,9 @@ module_param_named(msi, radeon_msi, int, 0444);
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (defaul 10000 = 10 seconds, 0 = disable)"); MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (defaul 10000 = 10 seconds, 0 = disable)");
module_param_named(lockup_timeout, radeon_lockup_timeout, int, 0444); module_param_named(lockup_timeout, radeon_lockup_timeout, int, 0444);
MODULE_PARM_DESC(fastfb, "Direct FB access for IGP chips (0 = disable, 1 = enable)");
module_param_named(fastfb, radeon_fastfb, int, 0444);
static struct pci_device_id pciidlist[] = { static struct pci_device_id pciidlist[] = {
radeon_PCI_IDS radeon_PCI_IDS
}; };
......
...@@ -376,6 +376,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -376,6 +376,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
else else
return -EINVAL; return -EINVAL;
break; break;
case RADEON_INFO_FASTFB_WORKING:
value = rdev->fastfb_working;
break;
default: default:
DRM_DEBUG_KMS("Invalid request %d\n", info->request); DRM_DEBUG_KMS("Invalid request %d\n", info->request);
return -EINVAL; return -EINVAL;
......
...@@ -321,8 +321,10 @@ void radeon_bo_force_delete(struct radeon_device *rdev) ...@@ -321,8 +321,10 @@ void radeon_bo_force_delete(struct radeon_device *rdev)
int radeon_bo_init(struct radeon_device *rdev) int radeon_bo_init(struct radeon_device *rdev)
{ {
/* Add an MTRR for the VRAM */ /* Add an MTRR for the VRAM */
rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, if (!rdev->fastfb_working) {
rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
MTRR_TYPE_WRCOMB, 1); MTRR_TYPE_WRCOMB, 1);
}
DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
rdev->mc.mc_vram_size >> 20, rdev->mc.mc_vram_size >> 20,
(unsigned long long)rdev->mc.aper_size >> 20); (unsigned long long)rdev->mc.aper_size >> 20);
......
...@@ -148,6 +148,8 @@ void rs690_pm_info(struct radeon_device *rdev) ...@@ -148,6 +148,8 @@ void rs690_pm_info(struct radeon_device *rdev)
static void rs690_mc_init(struct radeon_device *rdev) static void rs690_mc_init(struct radeon_device *rdev)
{ {
u64 base; u64 base;
uint32_t h_addr, l_addr;
unsigned long long k8_addr;
rs400_gart_adjust_size(rdev); rs400_gart_adjust_size(rdev);
rdev->mc.vram_is_ddr = true; rdev->mc.vram_is_ddr = true;
...@@ -160,6 +162,27 @@ static void rs690_mc_init(struct radeon_device *rdev) ...@@ -160,6 +162,27 @@ static void rs690_mc_init(struct radeon_device *rdev)
base = RREG32_MC(R_000100_MCCFG_FB_LOCATION); base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
base = G_000100_MC_FB_START(base) << 16; base = G_000100_MC_FB_START(base) << 16;
rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
/* Use K8 direct mapping for fast fb access. */
rdev->fastfb_working = false;
h_addr = G_00005F_K8_ADDR_EXT(RREG32_MC(R_00005F_MC_MISC_UMA_CNTL));
l_addr = RREG32_MC(R_00001E_K8_FB_LOCATION);
k8_addr = ((unsigned long long)h_addr) << 32 | l_addr;
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
if (k8_addr + rdev->mc.visible_vram_size < 0x100000000ULL)
#endif
{
/* FastFB shall be used with UMA memory. Here it is simply disabled when sideport
* memory is present.
*/
if (rdev->mc.igp_sideport_enabled == false && radeon_fastfb == 1) {
DRM_INFO("Direct mapping: aper base at 0x%llx, replaced by direct mapping base 0x%llx.\n",
(unsigned long long)rdev->mc.aper_base, k8_addr);
rdev->mc.aper_base = (resource_size_t)k8_addr;
rdev->fastfb_working = true;
}
}
rs690_pm_info(rdev); rs690_pm_info(rdev);
radeon_vram_location(rdev, &rdev->mc, base); radeon_vram_location(rdev, &rdev->mc, base);
rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1; rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
......
...@@ -29,6 +29,9 @@ ...@@ -29,6 +29,9 @@
#define __RS690D_H__ #define __RS690D_H__
/* Registers */ /* Registers */
#define R_00001E_K8_FB_LOCATION 0x00001E
#define R_00005F_MC_MISC_UMA_CNTL 0x00005F
#define G_00005F_K8_ADDR_EXT(x) (((x) >> 0) & 0xFF)
#define R_000078_MC_INDEX 0x000078 #define R_000078_MC_INDEX 0x000078
#define S_000078_MC_IND_ADDR(x) (((x) & 0x1FF) << 0) #define S_000078_MC_IND_ADDR(x) (((x) & 0x1FF) << 0)
#define G_000078_MC_IND_ADDR(x) (((x) >> 0) & 0x1FF) #define G_000078_MC_IND_ADDR(x) (((x) >> 0) & 0x1FF)
......
...@@ -972,6 +972,9 @@ struct drm_radeon_cs { ...@@ -972,6 +972,9 @@ struct drm_radeon_cs {
#define RADEON_INFO_MAX_SE 0x12 #define RADEON_INFO_MAX_SE 0x12
/* max SH per SE */ /* max SH per SE */
#define RADEON_INFO_MAX_SH_PER_SE 0x13 #define RADEON_INFO_MAX_SH_PER_SE 0x13
/* fast fb access is enabled */
#define RADEON_INFO_FASTFB_WORKING 0x14
struct drm_radeon_info { struct drm_radeon_info {
uint32_t request; uint32_t request;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment