Commit a5f61827 authored by Kamil Trzciński's avatar Kamil Trzciński

Optimise build queue service

This makes BuildQueueService to force refresh runners
that are considered to have recent queue.

Such runners are the ones that connected within online
interval + time to expire runner cache.
parent 9174d60b
...@@ -23,13 +23,17 @@ module Ci ...@@ -23,13 +23,17 @@ module Ci
project_type: 3 project_type: 3
} }
RUNNER_QUEUE_EXPIRY_TIME = 60.minutes
ONLINE_CONTACT_TIMEOUT = 1.hour ONLINE_CONTACT_TIMEOUT = 1.hour
UPDATE_DB_RUNNER_INFO_EVERY = 40.minutes RUNNER_QUEUE_EXPIRY_TIME = 1.hour
# This needs to be less than `ONLINE_CONTACT_TIMEOUT`
UPDATE_CONTACT_COLUMN_EVERY = (40.minutes..55.minutes).freeze
AVAILABLE_TYPES_LEGACY = %w[specific shared].freeze AVAILABLE_TYPES_LEGACY = %w[specific shared].freeze
AVAILABLE_TYPES = runner_types.keys.freeze AVAILABLE_TYPES = runner_types.keys.freeze
AVAILABLE_STATUSES = %w[active paused online offline].freeze AVAILABLE_STATUSES = %w[active paused online offline].freeze
AVAILABLE_SCOPES = (AVAILABLE_TYPES_LEGACY + AVAILABLE_TYPES + AVAILABLE_STATUSES).freeze AVAILABLE_SCOPES = (AVAILABLE_TYPES_LEGACY + AVAILABLE_TYPES + AVAILABLE_STATUSES).freeze
FORM_EDITABLE = %i[description tag_list active run_untagged locked access_level maximum_timeout_human_readable].freeze FORM_EDITABLE = %i[description tag_list active run_untagged locked access_level maximum_timeout_human_readable].freeze
ignore_column :is_shared ignore_column :is_shared
...@@ -46,7 +50,7 @@ module Ci ...@@ -46,7 +50,7 @@ module Ci
scope :active, -> { where(active: true) } scope :active, -> { where(active: true) }
scope :paused, -> { where(active: false) } scope :paused, -> { where(active: false) }
scope :online, -> { where('contacted_at > ?', contact_time_deadline) } scope :online, -> { where('contacted_at > ?', online_contact_time_deadline) }
# The following query using negation is cheaper than using `contacted_at <= ?` # The following query using negation is cheaper than using `contacted_at <= ?`
# because there are less runners online than have been created. The # because there are less runners online than have been created. The
# resulting query is quickly finding online ones and then uses the regular # resulting query is quickly finding online ones and then uses the regular
...@@ -56,6 +60,8 @@ module Ci ...@@ -56,6 +60,8 @@ module Ci
scope :offline, -> { where.not(id: online) } scope :offline, -> { where.not(id: online) }
scope :ordered, -> { order(id: :desc) } scope :ordered, -> { order(id: :desc) }
scope :with_recent_runner_queue, -> { where('contacted_at > ?', recent_queue_deadline) }
# BACKWARD COMPATIBILITY: There are needed to maintain compatibility with `AVAILABLE_SCOPES` used by `lib/api/runners.rb` # BACKWARD COMPATIBILITY: There are needed to maintain compatibility with `AVAILABLE_SCOPES` used by `lib/api/runners.rb`
scope :deprecated_shared, -> { instance_type } scope :deprecated_shared, -> { instance_type }
scope :deprecated_specific, -> { project_type.or(group_type) } scope :deprecated_specific, -> { project_type.or(group_type) }
...@@ -137,10 +143,18 @@ module Ci ...@@ -137,10 +143,18 @@ module Ci
fuzzy_search(query, [:token, :description]) fuzzy_search(query, [:token, :description])
end end
def self.contact_time_deadline def self.online_contact_time_deadline
ONLINE_CONTACT_TIMEOUT.ago ONLINE_CONTACT_TIMEOUT.ago
end end
def self.recent_queue_deadline
# we add queue expiry + online
# - contacted_at can be updated at any time within this interval
# we have always accurate `contacted_at` but it is stored in Redis
# and not persisted in database
(ONLINE_CONTACT_TIMEOUT + RUNNER_QUEUE_EXPIRY_TIME).ago
end
def self.order_by(order) def self.order_by(order)
if order == 'contacted_asc' if order == 'contacted_asc'
order_contacted_at_asc order_contacted_at_asc
...@@ -174,7 +188,7 @@ module Ci ...@@ -174,7 +188,7 @@ module Ci
end end
def online? def online?
contacted_at && contacted_at > self.class.contact_time_deadline contacted_at && contacted_at > self.class.online_contact_time_deadline
end end
def status def status
...@@ -275,9 +289,7 @@ module Ci ...@@ -275,9 +289,7 @@ module Ci
def persist_cached_data? def persist_cached_data?
# Use a random threshold to prevent beating DB updates. # Use a random threshold to prevent beating DB updates.
# It generates a distribution between [40m, 80m]. contacted_at_max_age = Random.rand(UPDATE_CONTACT_COLUMN_EVERY)
contacted_at_max_age = UPDATE_DB_RUNNER_INFO_EVERY + Random.rand(UPDATE_DB_RUNNER_INFO_EVERY)
real_contacted_at = read_attribute(:contacted_at) real_contacted_at = read_attribute(:contacted_at)
real_contacted_at.nil? || real_contacted_at.nil? ||
......
...@@ -9,6 +9,10 @@ module Ci ...@@ -9,6 +9,10 @@ module Ci
private private
def tick_for(build, runners) def tick_for(build, runners)
if Feature.enabled?(:ci_update_queues_for_online_runners, build.project, default_enabled: true)
runners = runners.with_recent_runner_queue
end
runners.each do |runner| runners.each do |runner|
runner.pick_build!(build) runner.pick_build!(build)
end end
......
---
title: Optimise UpdateBuildQueueService
merge_request: 32095
author:
type: performance
...@@ -80,6 +80,13 @@ describe Ci::Runner do ...@@ -80,6 +80,13 @@ describe Ci::Runner do
end end
end end
describe 'constraints' do
it '.UPDATE_CONTACT_COLUMN_EVERY' do
expect(described_class::UPDATE_CONTACT_COLUMN_EVERY.max)
.to be <= described_class::ONLINE_CONTACT_TIMEOUT
end
end
describe '#access_level' do describe '#access_level' do
context 'when creating new runner and access_level is nil' do context 'when creating new runner and access_level is nil' do
let(:runner) do let(:runner) do
......
...@@ -7,84 +7,108 @@ describe Ci::UpdateBuildQueueService do ...@@ -7,84 +7,108 @@ describe Ci::UpdateBuildQueueService do
let(:build) { create(:ci_build, pipeline: pipeline) } let(:build) { create(:ci_build, pipeline: pipeline) }
let(:pipeline) { create(:ci_pipeline, project: project) } let(:pipeline) { create(:ci_pipeline, project: project) }
context 'when updating specific runners' do shared_examples 'refreshes runner' do
let(:runner) { create(:ci_runner, :project, projects: [project]) }
context 'when there is a runner that can pick build' do
it 'ticks runner queue value' do it 'ticks runner queue value' do
expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value } expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value }
end end
end end
context 'when there is no runner that can pick build' do shared_examples 'does not refresh runner' do
let(:another_project) { create(:project) } it 'ticks runner queue value' do
let(:runner) { create(:ci_runner, :project, projects: [another_project]) }
it 'does not tick runner queue value' do
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value } expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
end end
end end
end
context 'when updating shared runners' do shared_examples 'matching build' do
let(:runner) { create(:ci_runner, :instance) } context 'when there is a online runner that can pick build' do
before do
runner.update!(contacted_at: 30.minutes.ago)
end
context 'when there is no runner that can pick build' do it_behaves_like 'refreshes runner'
it 'ticks runner queue value' do
expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value }
end end
end end
shared_examples 'mismatching tags' do
context 'when there is no runner that can pick build due to tag mismatch' do context 'when there is no runner that can pick build due to tag mismatch' do
before do before do
build.tag_list = [:docker] build.tag_list = [:docker]
end end
it 'does not tick runner queue value' do it_behaves_like 'does not refresh runner'
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
end end
end end
context 'when there is no runner that can pick build due to being disabled on project' do shared_examples 'recent runner queue' do
context 'when there is runner with expired cache' do
before do before do
build.project.shared_runners_enabled = false runner.update!(contacted_at: Ci::Runner.recent_queue_deadline)
end end
it 'does not tick runner queue value' do context 'when ci_update_queues_for_online_runners is enabled' do
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value } before do
stub_feature_flags(ci_update_queues_for_online_runners: true)
end end
it_behaves_like 'does not refresh runner'
end end
context 'when ci_update_queues_for_online_runners is disabled' do
before do
stub_feature_flags(ci_update_queues_for_online_runners: false)
end end
context 'when updating group runners' do it_behaves_like 'refreshes runner'
let(:group) { create(:group) } end
let(:project) { create(:project, group: group) } end
let(:runner) { create(:ci_runner, :group, groups: [group]) } end
context 'when there is a runner that can pick build' do context 'when updating specific runners' do
it 'ticks runner queue value' do let(:runner) { create(:ci_runner, :project, projects: [project]) }
expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value }
it_behaves_like 'matching build'
it_behaves_like 'mismatching tags'
it_behaves_like 'recent runner queue'
context 'when the runner is assigned to another project' do
let(:another_project) { create(:project) }
let(:runner) { create(:ci_runner, :project, projects: [another_project]) }
it_behaves_like 'does not refresh runner'
end end
end end
context 'when there is no runner that can pick build due to tag mismatch' do context 'when updating shared runners' do
let(:runner) { create(:ci_runner, :instance) }
it_behaves_like 'matching build'
it_behaves_like 'mismatching tags'
it_behaves_like 'recent runner queue'
context 'when there is no runner that can pick build due to being disabled on project' do
before do before do
build.tag_list = [:docker] build.project.shared_runners_enabled = false
end end
it 'does not tick runner queue value' do it_behaves_like 'does not refresh runner'
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
end end
end end
context 'when updating group runners' do
let(:group) { create(:group) }
let(:project) { create(:project, group: group) }
let(:runner) { create(:ci_runner, :group, groups: [group]) }
it_behaves_like 'matching build'
it_behaves_like 'mismatching tags'
it_behaves_like 'recent runner queue'
context 'when there is no runner that can pick build due to being disabled on project' do context 'when there is no runner that can pick build due to being disabled on project' do
before do before do
build.project.group_runners_enabled = false build.project.group_runners_enabled = false
end end
it 'does not tick runner queue value' do it_behaves_like 'does not refresh runner'
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
end
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment