Commit 619ac0da authored by James Fargher's avatar James Fargher

Merge branch 'optimize-deployment-finder' into 'master'

Optimize Deployment API that filters by updated_at [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!59771
parents a4e70544 9c7a8911
......@@ -16,14 +16,25 @@
class DeploymentsFinder
attr_reader :params
# Warning:
# These const are directly used in Deployment Rest API, thus
# modifying these values could implicity change the API interface or introduce a breaking change.
# Also, if you add a sort value, make sure that the new query will stay
# performant with the other filtering/sorting parameters.
# The composed query could be significantly slower when the filtering and sorting columns are different.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/325627 for example.
ALLOWED_SORT_VALUES = %w[id iid created_at updated_at ref finished_at].freeze
DEFAULT_SORT_VALUE = 'id'
ALLOWED_SORT_DIRECTIONS = %w[asc desc].freeze
DEFAULT_SORT_DIRECTION = 'asc'
InefficientQueryError = Class.new(StandardError)
def initialize(params = {})
@params = params
validate!
end
def execute
......@@ -38,6 +49,32 @@ class DeploymentsFinder
private
def validate!
if filter_by_updated_at? && filter_by_finished_at?
raise InefficientQueryError, 'Both `updated_at` filter and `finished_at` filter can not be specified'
end
# Currently, the inefficient parameters are allowed in order to avoid breaking changes in Deployment API.
# We'll switch to a hard error in https://gitlab.com/gitlab-org/gitlab/-/issues/328500.
if (filter_by_updated_at? && !order_by_updated_at?) || (!filter_by_updated_at? && order_by_updated_at?)
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(
InefficientQueryError.new('`updated_at` filter and `updated_at` sorting must be paired')
)
end
if (filter_by_finished_at? && !order_by_finished_at?) || (!filter_by_finished_at? && order_by_finished_at?)
raise InefficientQueryError, '`finished_at` filter and `finished_at` sorting must be paired'
end
if filter_by_finished_at? && !filter_by_successful_deployment?
raise InefficientQueryError, '`finished_at` filter must be combined with `success` status filter.'
end
if params[:environment].present? && !params[:project].present?
raise InefficientQueryError, '`environment` filter must be combined with `project` scope.'
end
end
def init_collection
if params[:project].present?
params[:project].deployments
......@@ -49,6 +86,8 @@ class DeploymentsFinder
end
def sort(items)
sort_params = build_sort_params
optimize_sort_params!(sort_params)
items.order(sort_params) # rubocop: disable CodeReuse/ActiveRecord
end
......@@ -67,8 +106,8 @@ class DeploymentsFinder
end
def by_environment(items)
if params[:environment].present?
items.for_environment_name(params[:environment])
if params[:project].present? && params[:environment].present?
items.for_environment_name(params[:project], params[:environment])
else
items
end
......@@ -84,14 +123,58 @@ class DeploymentsFinder
items.for_status(params[:status])
end
def sort_params
def build_sort_params
order_by = ALLOWED_SORT_VALUES.include?(params[:order_by]) ? params[:order_by] : DEFAULT_SORT_VALUE
order_direction = ALLOWED_SORT_DIRECTIONS.include?(params[:sort]) ? params[:sort] : DEFAULT_SORT_DIRECTION
{ order_by => order_direction }.tap do |sort_values|
sort_values['id'] = 'desc' if sort_values['updated_at']
sort_values['id'] = sort_values.delete('created_at') if sort_values['created_at'] # Sorting by `id` produces the same result as sorting by `created_at`
{ order_by => order_direction }
end
def optimize_sort_params!(sort_params)
# Implicitly enforce the ordering when filtered by `updated_at` column for performance optimization.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/325627#note_552417509.
# We remove this in https://gitlab.com/gitlab-org/gitlab/-/issues/328500.
if filter_by_updated_at? && implicitly_enforce_ordering_for_updated_at_filter?
sort_params.replace('updated_at' => sort_params.each_value.first)
end
if sort_params['created_at'] || sort_params['iid']
# Sorting by `id` produces the same result as sorting by `created_at` or `iid`
sort_params.replace(id: sort_params.each_value.first)
elsif sort_params['updated_at']
# This adds the order as a tie-breaker when multiple rows have the same updated_at value.
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/20848.
sort_params.merge!(id: :desc)
end
end
def filter_by_updated_at?
params[:updated_before].present? || params[:updated_after].present?
end
def filter_by_finished_at?
params[:finished_before].present? || params[:finished_after].present?
end
def filter_by_successful_deployment?
params[:status].to_s == 'success'
end
def order_by_updated_at?
params[:order_by].to_s == 'updated_at'
end
def order_by_finished_at?
params[:order_by].to_s == 'finished_at'
end
def implicitly_enforce_ordering_for_updated_at_filter?
return false unless params[:project].present?
::Feature.enabled?(
:deployments_finder_implicitly_enforce_ordering_for_updated_at_filter,
params[:project],
default_enabled: :yaml)
end
# rubocop: disable CodeReuse/ActiveRecord
......
......@@ -32,8 +32,8 @@ class Deployment < ApplicationRecord
delegate :kubernetes_namespace, to: :deployment_cluster, allow_nil: true
scope :for_environment, -> (environment) { where(environment_id: environment) }
scope :for_environment_name, -> (name) do
joins(:environment).where(environments: { name: name })
scope :for_environment_name, -> (project, name) do
where(environment_id: Environment.select(:id).where(project: project, name: name))
end
scope :for_status, -> (status) { where(status: status) }
......
---
title: Recreate index for deployments updated_at and finished_at
merge_request: 59771
author:
type: performance
---
name: deployments_finder_implicitly_enforce_ordering_for_updated_at_filter
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/59771
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/329286
milestone: '13.12'
type: development
group: group::release
default_enabled: false
# frozen_string_literal: true
# This migration recreates the index that introduced in 20210326035553_add_index_for_project_deployments_with_environment_id_and_updated_at.rb.
class RecreateIndexForProjectDeploymentsWithEnvironmentIdAndDateAt < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
OLD_INDEX_NAME = 'index_deployments_on_project_and_environment_and_updated_at'
NEW_INDEX_NAME = 'index_deployments_on_project_and_environment_and_updated_at_id'
def up
add_concurrent_index :deployments, [:project_id, :environment_id, :updated_at, :id], name: NEW_INDEX_NAME
remove_concurrent_index_by_name :deployments, OLD_INDEX_NAME
end
def down
add_concurrent_index :deployments, [:project_id, :environment_id, :updated_at], name: OLD_INDEX_NAME
remove_concurrent_index_by_name :deployments, NEW_INDEX_NAME
end
end
b7f75e3b443bfcb1aea812ad1682a31a99021f41ef4d47bdf600437db6f4f2f3
\ No newline at end of file
......@@ -22754,7 +22754,7 @@ CREATE INDEX index_deployments_on_id_and_status_and_created_at ON deployments US
CREATE INDEX index_deployments_on_id_where_cluster_id_present ON deployments USING btree (id) WHERE (cluster_id IS NOT NULL);
CREATE INDEX index_deployments_on_project_and_environment_and_updated_at ON deployments USING btree (project_id, environment_id, updated_at);
CREATE INDEX index_deployments_on_project_and_environment_and_updated_at_id ON deployments USING btree (project_id, environment_id, updated_at, id);
CREATE INDEX index_deployments_on_project_and_finished ON deployments USING btree (project_id, finished_at) WHERE (status = 2);
......@@ -49,7 +49,9 @@ module API
project: user_project,
environment: environment_name,
finished_after: start_date,
finished_before: end_date
finished_before: end_date,
status: :success,
order_by: :finished_at
).execute
end
end
......
......@@ -29,7 +29,7 @@ module Gitlab
# rubocop: disable CodeReuse/ActiveRecord
def deployment_count_via_finder
deployments = DeploymentsFinder
.new(group: group, finished_after: options[:from], finished_before: options[:to], status: :success)
.new(group: group, finished_after: options[:from], finished_before: options[:to], status: :success, order_by: :finished_at)
.execute
deployments = deployments.where(project_id: options[:projects]) if options[:projects].present?
......
......@@ -41,6 +41,8 @@ module API
.execute.with_api_entity_associations
present paginate(deployments), with: Entities::Deployment
rescue DeploymentsFinder::InefficientQueryError => e
bad_request!(e.message)
end
desc 'Gets a specific deployment' do
......
......@@ -16,7 +16,7 @@ module Gitlab
def deployments_count
DeploymentsFinder
.new(project: @project, finished_after: @from, finished_before: @to, status: :success)
.new(project: @project, finished_after: @from, finished_before: @to, status: :success, order_by: :finished_at)
.execute
.count
end
......
......@@ -5,6 +5,58 @@ require 'spec_helper'
RSpec.describe DeploymentsFinder do
subject { described_class.new(params).execute }
describe "validation" do
context 'when both updated_at and finished_at filters are specified' do
let(:params) { { updated_before: 1.day.ago, finished_before: 1.day.ago } }
it 'raises an error' do
expect { subject }.to raise_error(
described_class::InefficientQueryError,
'Both `updated_at` filter and `finished_at` filter can not be specified')
end
end
context 'when updated_at filter and id sorting' do
let(:params) { { updated_before: 1.day.ago, order_by: :id } }
it 'raises an error' do
expect { subject }.to raise_error(
described_class::InefficientQueryError,
'`updated_at` filter and `updated_at` sorting must be paired')
end
end
context 'when finished_at filter and id sorting' do
let(:params) { { finished_before: 1.day.ago, order_by: :id } }
it 'raises an error' do
expect { subject }.to raise_error(
described_class::InefficientQueryError,
'`finished_at` filter and `finished_at` sorting must be paired')
end
end
context 'when finished_at filter with failed status filter' do
let(:params) { { finished_before: 1.day.ago, order_by: :finished_at, status: :failed } }
it 'raises an error' do
expect { subject }.to raise_error(
described_class::InefficientQueryError,
'`finished_at` filter must be combined with `success` status filter.')
end
end
context 'when environment filter with non-project scope' do
let(:params) { { environment: 'production' } }
it 'raises an error' do
expect { subject }.to raise_error(
described_class::InefficientQueryError,
'`environment` filter must be combined with `project` scope.')
end
end
end
describe "#execute" do
context 'when project or group is missing' do
let(:params) { {} }
......@@ -20,13 +72,24 @@ RSpec.describe DeploymentsFinder do
describe 'filtering' do
context 'when updated_at filters are specified' do
let(:params) { { **base_params, updated_before: 1.day.ago, updated_after: 3.days.ago } }
let!(:deployment_1) { create(:deployment, :success, project: project, updated_at: 2.days.ago) }
let!(:deployment_2) { create(:deployment, :success, project: project, updated_at: 4.days.ago) }
let!(:deployment_3) { create(:deployment, :success, project: project, updated_at: 1.hour.ago) }
let_it_be(:deployment_1) { create(:deployment, :success, project: project, updated_at: 48.hours.ago) }
let_it_be(:deployment_2) { create(:deployment, :success, project: project, updated_at: 47.hours.ago) }
let_it_be(:deployment_3) { create(:deployment, :success, project: project, updated_at: 4.days.ago) }
let_it_be(:deployment_4) { create(:deployment, :success, project: project, updated_at: 1.hour.ago) }
let(:params) { { **base_params, updated_before: 1.day.ago, updated_after: 3.days.ago, order_by: :updated_at } }
it 'returns deployments with matched updated_at' do
is_expected.to match_array([deployment_1])
is_expected.to match_array([deployment_2, deployment_1])
end
context 'when deployments_finder_implicitly_enforce_ordering_for_updated_at_filter feature flag is disabled' do
before do
stub_feature_flags(deployments_finder_implicitly_enforce_ordering_for_updated_at_filter: false)
end
it 'returns deployments with matched updated_at' do
is_expected.to match_array([deployment_1, deployment_2])
end
end
end
......@@ -72,30 +135,34 @@ RSpec.describe DeploymentsFinder do
let(:params) { { **base_params, order_by: order_by, sort: sort } }
let!(:deployment_1) { create(:deployment, :success, project: project, iid: 11, ref: 'master', created_at: 2.days.ago, updated_at: Time.now, finished_at: Time.now) }
let!(:deployment_2) { create(:deployment, :success, project: project, iid: 12, ref: 'feature', created_at: 1.day.ago, updated_at: 2.hours.ago, finished_at: 2.hours.ago) }
let!(:deployment_3) { create(:deployment, :success, project: project, iid: 8, ref: 'video', created_at: Time.now, updated_at: 1.hour.ago, finished_at: 1.hour.ago) }
let!(:deployment_1) { create(:deployment, :success, project: project, ref: 'master', created_at: 2.days.ago, updated_at: Time.now, finished_at: Time.now) }
let!(:deployment_2) { create(:deployment, :success, project: project, ref: 'feature', created_at: 1.day.ago, updated_at: 2.hours.ago, finished_at: 2.hours.ago) }
let!(:deployment_3) { create(:deployment, :success, project: project, ref: 'video', created_at: Time.now, updated_at: 1.hour.ago, finished_at: 1.hour.ago) }
where(:order_by, :sort, :ordered_deployments) do
'created_at' | 'asc' | [:deployment_1, :deployment_2, :deployment_3]
'created_at' | 'desc' | [:deployment_3, :deployment_2, :deployment_1]
'id' | 'asc' | [:deployment_1, :deployment_2, :deployment_3]
'id' | 'desc' | [:deployment_3, :deployment_2, :deployment_1]
'iid' | 'asc' | [:deployment_3, :deployment_1, :deployment_2]
'iid' | 'desc' | [:deployment_2, :deployment_1, :deployment_3]
'iid' | 'asc' | [:deployment_1, :deployment_2, :deployment_3]
'iid' | 'desc' | [:deployment_3, :deployment_2, :deployment_1]
'ref' | 'asc' | [:deployment_2, :deployment_1, :deployment_3]
'ref' | 'desc' | [:deployment_3, :deployment_1, :deployment_2]
'updated_at' | 'asc' | [:deployment_2, :deployment_3, :deployment_1]
'updated_at' | 'desc' | [:deployment_1, :deployment_3, :deployment_2]
'finished_at' | 'asc' | [:deployment_2, :deployment_3, :deployment_1]
'finished_at' | 'desc' | [:deployment_1, :deployment_3, :deployment_2]
'updated_at' | 'asc' | described_class::InefficientQueryError
'updated_at' | 'desc' | described_class::InefficientQueryError
'finished_at' | 'asc' | described_class::InefficientQueryError
'finished_at' | 'desc' | described_class::InefficientQueryError
'invalid' | 'asc' | [:deployment_1, :deployment_2, :deployment_3]
'iid' | 'err' | [:deployment_3, :deployment_1, :deployment_2]
'iid' | 'err' | [:deployment_1, :deployment_2, :deployment_3]
end
with_them do
it 'returns the deployments ordered' do
expect(subject).to eq(ordered_deployments.map { |name| public_send(name) })
if ordered_deployments == described_class::InefficientQueryError
expect { subject }.to raise_error(described_class::InefficientQueryError)
else
expect(subject).to eq(ordered_deployments.map { |name| public_send(name) })
end
end
end
end
......@@ -112,8 +179,20 @@ RSpec.describe DeploymentsFinder do
end
end
describe 'tie-breaker for `finished_at` sorting' do
let(:params) { { **base_params, order_by: 'updated_at', sort: 'asc' } }
describe 'transform `iid` sorting to `id` sorting' do
let(:params) { { **base_params, order_by: 'iid', sort: 'asc' } }
it 'sorts by only one column' do
expect(subject.order_values.size).to eq(1)
end
it 'sorts by `id`' do
expect(subject.order_values.first.to_sql).to eq(Deployment.arel_table[:id].asc.to_sql)
end
end
describe 'tie-breaker for `updated_at` sorting' do
let(:params) { { **base_params, updated_after: 1.day.ago, order_by: 'updated_at', sort: 'asc' } }
it 'sorts by two columns' do
expect(subject.order_values.size).to eq(2)
......@@ -136,25 +215,56 @@ RSpec.describe DeploymentsFinder do
end
end
describe 'enforce sorting to `updated_at` sorting' do
let(:params) { { **base_params, updated_before: 1.day.ago, order_by: 'id', sort: 'asc' } }
before do
allow(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception)
end
it 'sorts by only one column' do
expect(subject.order_values.size).to eq(2)
end
it 'sorts by `updated_at`' do
expect(subject.order_values.first.to_sql).to eq(Deployment.arel_table[:updated_at].asc.to_sql)
expect(subject.order_values.second.to_sql).to eq(Deployment.arel_table[:id].desc.to_sql)
end
context 'when deployments_finder_implicitly_enforce_ordering_for_updated_at_filter feature flag is disabled' do
before do
stub_feature_flags(deployments_finder_implicitly_enforce_ordering_for_updated_at_filter: false)
end
it 'sorts by only one column' do
expect(subject.order_values.size).to eq(1)
end
it 'sorts by `id`' do
expect(subject.order_values.first.to_sql).to eq(Deployment.arel_table[:id].asc.to_sql)
end
end
end
context 'when filtering by finished time' do
let!(:deployment_1) { create(:deployment, :success, project: project, finished_at: 2.days.ago) }
let!(:deployment_2) { create(:deployment, :success, project: project, finished_at: 4.days.ago) }
let!(:deployment_3) { create(:deployment, :success, project: project, finished_at: 5.hours.ago) }
context 'when filtering by finished_after and finished_before' do
let(:params) { { **base_params, finished_after: 3.days.ago, finished_before: 1.day.ago } }
let(:params) { { **base_params, finished_after: 3.days.ago, finished_before: 1.day.ago, status: :success, order_by: :finished_at } }
it { is_expected.to match_array([deployment_1]) }
end
context 'when the finished_before parameter is missing' do
let(:params) { { **base_params, finished_after: 3.days.ago } }
let(:params) { { **base_params, finished_after: 3.days.ago, status: :success, order_by: :finished_at } }
it { is_expected.to match_array([deployment_1, deployment_3]) }
end
context 'when finished_after is missing' do
let(:params) { { **base_params, finished_before: 3.days.ago } }
let(:params) { { **base_params, finished_before: 3.days.ago, status: :success, order_by: :finished_at } }
it { is_expected.to match_array([deployment_2]) }
end
......@@ -188,10 +298,6 @@ RSpec.describe DeploymentsFinder do
'iid' | 'desc'
'ref' | 'asc'
'ref' | 'desc'
'updated_at' | 'asc'
'updated_at' | 'desc'
'finished_at' | 'asc'
'finished_at' | 'desc'
'invalid' | 'asc'
'iid' | 'err'
end
......
......@@ -72,6 +72,35 @@ RSpec.describe Deployment do
end
end
describe '.for_environment_name' do
subject { described_class.for_environment_name(project, environment_name) }
let_it_be(:project) { create(:project, :repository) }
let_it_be(:production) { create(:environment, :production, project: project) }
let_it_be(:staging) { create(:environment, :staging, project: project) }
let_it_be(:other_project) { create(:project, :repository) }
let_it_be(:other_production) { create(:environment, :production, project: other_project) }
let(:environment_name) { production.name }
context 'when deployment belongs to the environment' do
let!(:deployment) { create(:deployment, project: project, environment: production) }
it { is_expected.to eq([deployment]) }
end
context 'when deployment belongs to the same project but different environment name' do
let!(:deployment) { create(:deployment, project: project, environment: staging) }
it { is_expected.to be_empty }
end
context 'when deployment belongs to the same environment name but different project' do
let!(:deployment) { create(:deployment, project: other_project, environment: other_production) }
it { is_expected.to be_empty }
end
end
describe '.success' do
subject { described_class.success }
......
......@@ -12,9 +12,11 @@ RSpec.describe API::Deployments do
describe 'GET /projects/:id/deployments' do
let_it_be(:project) { create(:project, :repository) }
let_it_be(:deployment_1) { create(:deployment, :success, project: project, iid: 11, ref: 'master', created_at: Time.now, updated_at: Time.now) }
let_it_be(:deployment_2) { create(:deployment, :success, project: project, iid: 12, ref: 'master', created_at: 1.day.ago, updated_at: 2.hours.ago) }
let_it_be(:deployment_3) { create(:deployment, :success, project: project, iid: 8, ref: 'master', created_at: 2.days.ago, updated_at: 1.hour.ago) }
let_it_be(:production) { create(:environment, :production, project: project) }
let_it_be(:staging) { create(:environment, :staging, project: project) }
let_it_be(:deployment_1) { create(:deployment, :success, project: project, environment: production, ref: 'master', created_at: Time.now, updated_at: Time.now) }
let_it_be(:deployment_2) { create(:deployment, :success, project: project, environment: staging, ref: 'master', created_at: 1.day.ago, updated_at: 2.hours.ago) }
let_it_be(:deployment_3) { create(:deployment, :success, project: project, environment: staging, ref: 'master', created_at: 2.days.ago, updated_at: 1.hour.ago) }
def perform_request(params = {})
get api("/projects/#{project.id}/deployments", user), params: params
......@@ -36,17 +38,26 @@ RSpec.describe API::Deployments do
context 'with updated_at filters specified' do
it 'returns projects deployments with last update in specified datetime range' do
perform_request({ updated_before: 30.minutes.ago, updated_after: 90.minutes.ago })
perform_request({ updated_before: 30.minutes.ago, updated_after: 90.minutes.ago, order_by: :updated_at })
expect(response).to have_gitlab_http_status(:ok)
expect(response).to include_pagination_headers
expect(json_response.first['id']).to eq(deployment_3.id)
end
context 'when forbidden order_by is specified' do
it 'returns projects deployments with last update in specified datetime range' do
perform_request({ updated_before: 30.minutes.ago, updated_after: 90.minutes.ago, order_by: :id })
expect(response).to have_gitlab_http_status(:bad_request)
expect(json_response['message']).to include('`updated_at` filter and `updated_at` sorting must be paired')
end
end
end
context 'with the environment filter specifed' do
it 'returns deployments for the environment' do
perform_request({ environment: deployment_1.environment.name })
perform_request({ environment: production.name })
expect(json_response.size).to eq(1)
expect(json_response.first['iid']).to eq(deployment_1.iid)
......@@ -68,7 +79,7 @@ RSpec.describe API::Deployments do
end
it 'returns ordered deployments' do
expect(json_response.map { |i| i['id'] }).to eq([deployment_2.id, deployment_1.id, deployment_3.id])
expect(json_response.map { |i| i['id'] }).to eq([deployment_3.id, deployment_2.id, deployment_1.id])
end
context 'with invalid order_by' do
......@@ -475,7 +486,7 @@ RSpec.describe API::Deployments do
let(:project) { create(:project, :repository) }
let!(:deployment) { create(:deployment, :success, project: project) }
subject { get api("/projects/#{project.id}/deployments?order_by=updated_at&sort=asc", user) }
subject { get api("/projects/#{project.id}/deployments?order_by=id&sort=asc", user) }
it 'succeeds', :aggregate_failures do
subject
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment