Commit 582e2ce5 authored by Alex Deucher's avatar Alex Deucher

drm/amdgpu/display: FP fixes for DCN3.x (v4)

Add proper FP_START/END handling and adjust Makefiles per
previous asics.

v2: fix up harder.
v3: fix clkmgr Makefile for dcn30
v4: fix old gcc handling is only required for x86
Build-tested-by: default avatarNathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com> (v1)
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com> (v1)
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 20f2ffe5
...@@ -119,6 +119,19 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) ...@@ -119,6 +119,19 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
############################################################################### ###############################################################################
CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o
# prevent build errors regarding soft-float vs hard-float FP ABI tags
# this code is currently unused on ppc64, as it applies to VanGogh APUs only
ifdef CONFIG_PPC64
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
endif
# prevent build errors:
# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types
# this file is unused on arm64, just like on ppc64
ifdef CONFIG_ARM64
CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only
endif
AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
...@@ -127,6 +140,19 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) ...@@ -127,6 +140,19 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
############################################################################### ###############################################################################
CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o
# prevent build errors regarding soft-float vs hard-float FP ABI tags
# this code is currently unused on ppc64, as it applies to VanGogh APUs only
ifdef CONFIG_PPC64
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
endif
# prevent build errors:
# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types
# this file is unused on arm64, just like on ppc64
ifdef CONFIG_ARM64
CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := -mgeneral-regs-only
endif
AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301)) AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301)
......
...@@ -104,7 +104,7 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk ...@@ -104,7 +104,7 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk
} }
} }
static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
{ {
/* defaults */ /* defaults */
double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
...@@ -211,7 +211,9 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) ...@@ -211,7 +211,9 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base); clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base);
/* WM range table */ /* WM range table */
DC_FP_START();
dcn3_build_wm_range_table(clk_mgr); dcn3_build_wm_range_table(clk_mgr);
DC_FP_END();
} }
static int dcn30_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) static int dcn30_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
......
...@@ -52,6 +52,7 @@ IS_OLD_GCC = 1 ...@@ -52,6 +52,7 @@ IS_OLD_GCC = 1
endif endif
endif endif
ifdef CONFIG_X86
ifdef IS_OLD_GCC ifdef IS_OLD_GCC
# Stack alignment mismatch, proceed with caution. # Stack alignment mismatch, proceed with caution.
# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
...@@ -62,6 +63,7 @@ else ...@@ -62,6 +63,7 @@ else
CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2
CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2
endif endif
endif
AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30))
......
...@@ -1469,7 +1469,19 @@ int dcn30_populate_dml_pipes_from_context( ...@@ -1469,7 +1469,19 @@ int dcn30_populate_dml_pipes_from_context(
return pipe_cnt; return pipe_cnt;
} }
void dcn30_populate_dml_writeback_from_context( /*
* This must be noinline to ensure anything that deals with FP registers
* is contained within this call; previously our compiling with hard-float
* would result in fp instructions being emitted outside of the boundaries
* of the DC_FP_START/END macros, which makes sense as the compiler has no
* idea about what is wrapped and what is not
*
* This is largely just a workaround to avoid breakage introduced with 5.6,
* ideally all fp-using code should be moved into its own file, only that
* should be compiled with hard-float, and all code exported from there
* should be strictly wrapped with DC_FP_START/END
*/
static noinline void dcn30_populate_dml_writeback_from_context_fp(
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
{ {
int pipe_cnt, i, j; int pipe_cnt, i, j;
...@@ -1558,6 +1570,14 @@ void dcn30_populate_dml_writeback_from_context( ...@@ -1558,6 +1570,14 @@ void dcn30_populate_dml_writeback_from_context(
} }
void dcn30_populate_dml_writeback_from_context(
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
{
DC_FP_START();
dcn30_populate_dml_writeback_from_context_fp(dc, res_ctx, pipes);
DC_FP_END();
}
unsigned int dcn30_calc_max_scaled_time( unsigned int dcn30_calc_max_scaled_time(
unsigned int time_per_pixel, unsigned int time_per_pixel,
enum mmhubbub_wbif_mode mode, enum mmhubbub_wbif_mode mode,
...@@ -2204,7 +2224,19 @@ static bool dcn30_internal_validate_bw( ...@@ -2204,7 +2224,19 @@ static bool dcn30_internal_validate_bw(
return out; return out;
} }
void dcn30_calculate_wm_and_dlg( /*
* This must be noinline to ensure anything that deals with FP registers
* is contained within this call; previously our compiling with hard-float
* would result in fp instructions being emitted outside of the boundaries
* of the DC_FP_START/END macros, which makes sense as the compiler has no
* idea about what is wrapped and what is not
*
* This is largely just a workaround to avoid breakage introduced with 5.6,
* ideally all fp-using code should be moved into its own file, only that
* should be compiled with hard-float, and all code exported from there
* should be strictly wrapped with DC_FP_START/END
*/
static noinline void dcn30_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context, struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes, display_e2e_pipe_params_st *pipes,
int pipe_cnt, int pipe_cnt,
...@@ -2360,7 +2392,18 @@ void dcn30_calculate_wm_and_dlg( ...@@ -2360,7 +2392,18 @@ void dcn30_calculate_wm_and_dlg(
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
} }
bool dcn30_validate_bandwidth(struct dc *dc, void dcn30_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
int vlevel)
{
DC_FP_START();
dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
DC_FP_END();
}
static noinline bool dcn30_validate_bandwidth_fp(struct dc *dc,
struct dc_state *context, struct dc_state *context,
bool fast_validate) bool fast_validate)
{ {
...@@ -2411,7 +2454,20 @@ bool dcn30_validate_bandwidth(struct dc *dc, ...@@ -2411,7 +2454,20 @@ bool dcn30_validate_bandwidth(struct dc *dc,
return out; return out;
} }
static void get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, bool dcn30_validate_bandwidth(struct dc *dc,
struct dc_state *context,
bool fast_validate)
{
bool out;
DC_FP_START();
out = dcn30_validate_bandwidth_fp(dc, context, fast_validate);
DC_FP_END();
return out;
}
static noinline void get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
unsigned int *optimal_dcfclk, unsigned int *optimal_dcfclk,
unsigned int *optimal_fclk) unsigned int *optimal_fclk)
{ {
...@@ -2478,8 +2534,10 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params ...@@ -2478,8 +2534,10 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
// Calculate optimal dcfclk for each uclk // Calculate optimal dcfclk for each uclk
for (i = 0; i < num_uclk_states; i++) { for (i = 0; i < num_uclk_states; i++) {
DC_FP_START();
get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
&optimal_dcfclk_for_uclk[i], NULL); &optimal_dcfclk_for_uclk[i], NULL);
DC_FP_END();
if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
} }
...@@ -2583,6 +2641,8 @@ static bool dcn30_resource_construct( ...@@ -2583,6 +2641,8 @@ static bool dcn30_resource_construct(
struct irq_service_init_data init_data; struct irq_service_init_data init_data;
struct ddc_service_init_data ddc_init_data; struct ddc_service_init_data ddc_init_data;
DC_FP_START();
ctx->dc_bios->regs = &bios_regs; ctx->dc_bios->regs = &bios_regs;
pool->base.res_cap = &res_cap_dcn3; pool->base.res_cap = &res_cap_dcn3;
...@@ -2860,10 +2920,13 @@ static bool dcn30_resource_construct( ...@@ -2860,10 +2920,13 @@ static bool dcn30_resource_construct(
pool->base.oem_device = NULL; pool->base.oem_device = NULL;
} }
DC_FP_END();
return true; return true;
create_fail: create_fail:
DC_FP_END();
dcn30_resource_destruct(pool); dcn30_resource_destruct(pool);
return false; return false;
......
...@@ -64,6 +64,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) ...@@ -64,6 +64,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
...@@ -71,8 +74,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflag ...@@ -71,8 +74,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflag
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
endif endif
CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment