Merge branch 'improve-jobs-queuing-time-metric' into 'master'

Improve jobs queuing time metric (fixed version)

Closes #45205

See merge request gitlab-org/gitlab-ce!18283
parents 019e881d 067f5f11
...@@ -87,7 +87,7 @@ class CommitStatus < ActiveRecord::Base ...@@ -87,7 +87,7 @@ class CommitStatus < ActiveRecord::Base
transition [:created, :pending, :running, :manual] => :canceled transition [:created, :pending, :running, :manual] => :canceled
end end
before_transition created: [:pending, :running] do |commit_status| before_transition [:created, :skipped, :manual] => :pending do |commit_status|
commit_status.queued_at = Time.now commit_status.queued_at = Time.now
end end
......
...@@ -4,6 +4,9 @@ module Ci ...@@ -4,6 +4,9 @@ module Ci
class RegisterJobService class RegisterJobService
attr_reader :runner attr_reader :runner
JOB_QUEUE_DURATION_SECONDS_BUCKETS = [1, 3, 10, 30].freeze
JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET = 5.freeze
Result = Struct.new(:build, :valid?) Result = Struct.new(:build, :valid?)
def initialize(runner) def initialize(runner)
...@@ -104,10 +107,22 @@ module Ci ...@@ -104,10 +107,22 @@ module Ci
end end
def register_success(job) def register_success(job)
job_queue_duration_seconds.observe({ shared_runner: @runner.shared? }, Time.now - job.created_at) labels = { shared_runner: runner.shared?,
jobs_running_for_project: jobs_running_for_project(job) }
job_queue_duration_seconds.observe(labels, Time.now - job.queued_at) unless job.queued_at.nil?
attempt_counter.increment attempt_counter.increment
end end
def jobs_running_for_project(job)
return '+Inf' unless runner.shared?
# excluding currently started job
running_jobs_count = job.project.builds.running.where(runner: Ci::Runner.shared)
.limit(JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET + 1).count - 1
running_jobs_count < JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET ? running_jobs_count : "#{JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET}+"
end
def failed_attempt_counter def failed_attempt_counter
@failed_attempt_counter ||= Gitlab::Metrics.counter(:job_register_attempts_failed_total, "Counts the times a runner tries to register a job") @failed_attempt_counter ||= Gitlab::Metrics.counter(:job_register_attempts_failed_total, "Counts the times a runner tries to register a job")
end end
...@@ -117,7 +132,7 @@ module Ci ...@@ -117,7 +132,7 @@ module Ci
end end
def job_queue_duration_seconds def job_queue_duration_seconds
@job_queue_duration_seconds ||= Gitlab::Metrics.histogram(:job_queue_duration_seconds, 'Request handling execution time') @job_queue_duration_seconds ||= Gitlab::Metrics.histogram(:job_queue_duration_seconds, 'Request handling execution time', {}, JOB_QUEUE_DURATION_SECONDS_BUCKETS)
end end
end end
end end
---
title: Partition job_queue_duration_seconds with jobs_running_for_project
merge_request: 17730
author:
type: changed
...@@ -62,6 +62,7 @@ FactoryBot.define do ...@@ -62,6 +62,7 @@ FactoryBot.define do
end end
trait :pending do trait :pending do
queued_at 'Di 29. Okt 09:50:59 CET 2013'
status 'pending' status 'pending'
end end
......
...@@ -533,4 +533,36 @@ describe CommitStatus do ...@@ -533,4 +533,36 @@ describe CommitStatus do
end end
end end
end end
describe '#enqueue' do
let!(:current_time) { Time.new(2018, 4, 5, 14, 0, 0) }
before do
allow(Time).to receive(:now).and_return(current_time)
end
shared_examples 'commit status enqueued' do
it 'sets queued_at value when enqueued' do
expect { commit_status.enqueue }.to change { commit_status.reload.queued_at }.from(nil).to(current_time)
end
end
context 'when initial state is :created' do
let(:commit_status) { create(:commit_status, :created) }
it_behaves_like 'commit status enqueued'
end
context 'when initial state is :skipped' do
let(:commit_status) { create(:commit_status, :skipped) }
it_behaves_like 'commit status enqueued'
end
context 'when initial state is :manual' do
let(:commit_status) { create(:commit_status, :manual) }
it_behaves_like 'commit status enqueued'
end
end
end end
...@@ -370,10 +370,111 @@ module Ci ...@@ -370,10 +370,111 @@ module Ci
it_behaves_like 'validation is not active' it_behaves_like 'validation is not active'
end end
end end
end
describe '#register_success' do
let!(:current_time) { Time.new(2018, 4, 5, 14, 0, 0) }
let!(:attempt_counter) { double('Gitlab::Metrics::NullMetric') }
let!(:job_queue_duration_seconds) { double('Gitlab::Metrics::NullMetric') }
before do
allow(Time).to receive(:now).and_return(current_time)
# Stub defaults for any metrics other than the ones we're testing
allow(Gitlab::Metrics).to receive(:counter)
.with(any_args)
.and_return(Gitlab::Metrics::NullMetric.instance)
allow(Gitlab::Metrics).to receive(:histogram)
.with(any_args)
.and_return(Gitlab::Metrics::NullMetric.instance)
# Stub tested metrics
allow(Gitlab::Metrics).to receive(:counter)
.with(:job_register_attempts_total, anything)
.and_return(attempt_counter)
allow(Gitlab::Metrics).to receive(:histogram)
.with(:job_queue_duration_seconds, anything, anything, anything)
.and_return(job_queue_duration_seconds)
project.update(shared_runners_enabled: true)
pending_job.update(created_at: current_time - 3600, queued_at: current_time - 1800)
end
shared_examples 'attempt counter collector' do
it 'increments attempt counter' do
allow(job_queue_duration_seconds).to receive(:observe)
expect(attempt_counter).to receive(:increment)
execute(runner)
end
end
shared_examples 'jobs queueing time histogram collector' do
it 'counts job queuing time histogram with expected labels' do
allow(attempt_counter).to receive(:increment)
expect(job_queue_duration_seconds).to receive(:observe)
.with({ shared_runner: expected_shared_runner,
jobs_running_for_project: expected_jobs_running_for_project_first_job }, 1800)
execute(runner)
end
context 'when project already has running jobs' do
let!(:build2) { create( :ci_build, :running, pipeline: pipeline, runner: shared_runner) }
let!(:build3) { create( :ci_build, :running, pipeline: pipeline, runner: shared_runner) }
it 'counts job queuing time histogram with expected labels' do
allow(attempt_counter).to receive(:increment)
expect(job_queue_duration_seconds).to receive(:observe)
.with({ shared_runner: expected_shared_runner,
jobs_running_for_project: expected_jobs_running_for_project_third_job }, 1800)
execute(runner)
end
end
end
shared_examples 'metrics collector' do
it_behaves_like 'attempt counter collector'
it_behaves_like 'jobs queueing time histogram collector'
end
context 'when shared runner is used' do
let(:runner) { shared_runner }
let(:expected_shared_runner) { true }
let(:expected_jobs_running_for_project_first_job) { 0 }
let(:expected_jobs_running_for_project_third_job) { 2 }
it_behaves_like 'metrics collector'
context 'when pending job with queued_at=nil is used' do
before do
pending_job.update(queued_at: nil)
end
it_behaves_like 'attempt counter collector'
it "doesn't count job queuing time histogram" do
allow(attempt_counter).to receive(:increment)
expect(job_queue_duration_seconds).not_to receive(:observe)
execute(runner)
end
end
end
context 'when specific runner is used' do
let(:runner) { specific_runner }
let(:expected_shared_runner) { false }
let(:expected_jobs_running_for_project_first_job) { '+Inf' }
let(:expected_jobs_running_for_project_third_job) { '+Inf' }
it_behaves_like 'metrics collector'
end
end
def execute(runner) def execute(runner)
described_class.new(runner).execute.build described_class.new(runner).execute.build
end end
end end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment