Commit 410e7474 authored by Alvin Lee's avatar Alvin Lee Committed by Alex Deucher

drm/amd/display: Refactor SubVP calculation to remove FPU

Refactor calculation to remove floating point operations from dmub_srv.
To ensure that 32-bit compilation works well, we use the div64 family of
macros to do integer division for SubVP-related timing parameters.

Cc: Maíra Canal <mairacanal@riseup.net>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Isabella Basso <isabbasso@riseup.net>
Cc: Magali Lemes <magalilemes00@gmail.com>
Tested-by: default avatarDaniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: default avatarSamson Tam <Samson.Tam@amd.com>
Acked-by: default avatarTom Chung <chiahsuan.chung@amd.com>
Signed-off-by: default avatarAlvin Lee <alvin.lee2@amd.com>
Co-developed-by: default avatarAurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: default avatarAurabindo Pillai <aurabindo.pillai@amd.com>
Co-developed-by: default avatarRodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: default avatarRodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b04fa59a
...@@ -450,44 +450,42 @@ static void populate_subvp_cmd_drr_info(struct dc *dc, ...@@ -450,44 +450,42 @@ static void populate_subvp_cmd_drr_info(struct dc *dc,
struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing; struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing;
int16_t drr_frame_us = 0; uint16_t drr_frame_us = 0;
int16_t min_drr_supported_us = 0; uint16_t min_drr_supported_us = 0;
int16_t max_drr_supported_us = 0; uint16_t max_drr_supported_us = 0;
int16_t max_drr_vblank_us = 0; uint16_t max_drr_vblank_us = 0;
int16_t max_drr_mallregion_us = 0; uint16_t max_drr_mallregion_us = 0;
int16_t mall_region_us = 0; uint16_t mall_region_us = 0;
int16_t prefetch_us = 0; uint16_t prefetch_us = 0;
int16_t subvp_active_us = 0; uint16_t subvp_active_us = 0;
int16_t drr_active_us = 0; uint16_t drr_active_us = 0;
int16_t min_vtotal_supported = 0; uint16_t min_vtotal_supported = 0;
int16_t max_vtotal_supported = 0; uint16_t max_vtotal_supported = 0;
pipe_data->pipe_config.vblank_data.drr_info.drr_in_use = true; pipe_data->pipe_config.vblank_data.drr_info.drr_in_use = true;
pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for now don't use ramping pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for now don't use ramping
pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // hardcode 4ms DRR window for now pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // hardcode 4ms DRR window for now
drr_frame_us = div64_s64(drr_timing->v_total * drr_timing->h_total, drr_frame_us = div64_u64(((uint64_t)drr_timing->v_total * drr_timing->h_total * 1000000),
(int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000); (((uint64_t)drr_timing->pix_clk_100hz * 100)));
// P-State allow width and FW delays already included phantom_timing->v_addressable // P-State allow width and FW delays already included phantom_timing->v_addressable
mall_region_us = div64_s64(phantom_timing->v_addressable * phantom_timing->h_total, mall_region_us = div64_u64(((uint64_t)phantom_timing->v_addressable * phantom_timing->h_total * 1000000),
(int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000); (((uint64_t)phantom_timing->pix_clk_100hz * 100)));
min_drr_supported_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; min_drr_supported_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
min_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * min_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * min_drr_supported_us),
(div64_s64((int64_t)min_drr_supported_us, 1000000)), (((uint64_t)drr_timing->h_total * 1000000)));
(int64_t)drr_timing->h_total);
prefetch_us = div64_u64(((uint64_t)(phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total * 1000000),
prefetch_us = div64_s64((phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total, (((uint64_t)phantom_timing->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
(int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000 + subvp_active_us = div64_u64(((uint64_t)main_timing->v_addressable * main_timing->h_total * 1000000),
dc->caps.subvp_prefetch_end_to_mall_start_us); (((uint64_t)main_timing->pix_clk_100hz * 100)));
subvp_active_us = div64_s64(main_timing->v_addressable * main_timing->h_total, drr_active_us = div64_u64(((uint64_t)drr_timing->v_addressable * drr_timing->h_total * 1000000),
(int64_t)(main_timing->pix_clk_100hz * 100) * 1000000); (((uint64_t)drr_timing->pix_clk_100hz * 100)));
drr_active_us = div64_s64(drr_timing->v_addressable * drr_timing->h_total, max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us;
(int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000);
max_drr_vblank_us = div64_s64((int64_t)(subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us;
max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us; max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us;
max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us; max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us;
max_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * (div64_s64((int64_t)max_drr_supported_us, 1000000)), max_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * max_drr_supported_us),
(int64_t)drr_timing->h_total); (((uint64_t)drr_timing->h_total * 1000000)));
pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported; pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported;
pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported; pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported;
...@@ -581,10 +579,12 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, ...@@ -581,10 +579,12 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing; struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing;
struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL; struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
subvp0_prefetch_us = div64_s64((phantom_timing0->v_total - phantom_timing0->v_front_porch) * phantom_timing0->h_total, subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
(int64_t)(phantom_timing0->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us); (uint64_t)phantom_timing0->h_total * 1000000),
subvp1_prefetch_us = div64_s64((phantom_timing1->v_total - phantom_timing1->v_front_porch) * phantom_timing1->h_total, (((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
(int64_t)(phantom_timing1->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us); subvp1_prefetch_us = div64_u64(((uint64_t)(phantom_timing1->v_total - phantom_timing1->v_front_porch) *
(uint64_t)phantom_timing1->h_total * 1000000),
(((uint64_t)phantom_timing1->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
// Whichever SubVP PIPE has the smaller prefetch (including the prefetch end to mall start time) // Whichever SubVP PIPE has the smaller prefetch (including the prefetch end to mall start time)
// should increase it's prefetch time to match the other // should increase it's prefetch time to match the other
...@@ -592,16 +592,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, ...@@ -592,16 +592,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[1]; pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[1];
prefetch_delta_us = subvp0_prefetch_us - subvp1_prefetch_us; prefetch_delta_us = subvp0_prefetch_us - subvp1_prefetch_us;
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) * div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) *
(phantom_timing1->pix_clk_100hz * 100) + phantom_timing1->h_total - 1), ((uint64_t)phantom_timing1->pix_clk_100hz * 100) + ((uint64_t)phantom_timing1->h_total * 1000000 - 1)),
(int64_t)phantom_timing1->h_total); ((uint64_t)phantom_timing1->h_total * 1000000));
} else if (subvp1_prefetch_us > subvp0_prefetch_us) { } else if (subvp1_prefetch_us > subvp0_prefetch_us) {
pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[0]; pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[0];
prefetch_delta_us = subvp1_prefetch_us - subvp0_prefetch_us; prefetch_delta_us = subvp1_prefetch_us - subvp0_prefetch_us;
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) * div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) *
(phantom_timing0->pix_clk_100hz * 100) + phantom_timing0->h_total - 1), ((uint64_t)phantom_timing0->pix_clk_100hz * 100) + ((uint64_t)phantom_timing0->h_total * 1000000 - 1)),
(int64_t)phantom_timing0->h_total); ((uint64_t)phantom_timing0->h_total * 1000000));
} }
} }
...@@ -668,13 +669,11 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, ...@@ -668,13 +669,11 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
// Round up // Round up
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
div64_s64(((div64_s64((int64_t)dc->caps.subvp_prefetch_end_to_mall_start_us, 1000000)) * div64_u64(((uint64_t)dc->caps.subvp_prefetch_end_to_mall_start_us * ((uint64_t)phantom_timing->pix_clk_100hz * 100) +
(phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1), ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000));
(int64_t)phantom_timing->h_total);
pipe_data->pipe_config.subvp_data.processing_delay_lines = pipe_data->pipe_config.subvp_data.processing_delay_lines =
div64_s64(((div64_s64((int64_t)dc->caps.subvp_fw_processing_delay_us, 1000000)) * div64_u64(((uint64_t)(dc->caps.subvp_fw_processing_delay_us) * ((uint64_t)phantom_timing->pix_clk_100hz * 100) +
(phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1), ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000));
(int64_t)phantom_timing->h_total);
// Find phantom pipe index based on phantom stream // Find phantom pipe index based on phantom stream
for (j = 0; j < dc->res_pool->pipe_count; j++) { for (j = 0; j < dc->res_pool->pipe_count; j++) {
struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j]; struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment