Commit 353e0ada authored by Stan Hu's avatar Stan Hu

Merge branch 'simplify-health-checks' into 'master'

Refactor the usage of `HealthChecks`

See merge request gitlab-org/gitlab!17953
parents c7dd5259 ee4d6554
...@@ -4,19 +4,15 @@ class HealthController < ActionController::Base ...@@ -4,19 +4,15 @@ class HealthController < ActionController::Base
protect_from_forgery with: :exception, prepend: true protect_from_forgery with: :exception, prepend: true
include RequiresWhitelistedMonitoringClient include RequiresWhitelistedMonitoringClient
CHECKS = [
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
Gitlab::HealthChecks::GitalyCheck
].freeze
def readiness def readiness
results = CHECKS.map { |check| [check.name, check.readiness] } results = checks.flat_map(&:readiness)
success = results.all?(&:success)
# disable static error pages at the gitlab-workhorse level, we want to see this error response even in production
headers["X-GitLab-Custom-Error"] = 1 unless success
render_check_results(results) response = results.map { |result| [result.name, result.payload] }.to_h
render json: response, status: success ? :ok : :service_unavailable
end end
def liveness def liveness
...@@ -25,26 +21,7 @@ class HealthController < ActionController::Base ...@@ -25,26 +21,7 @@ class HealthController < ActionController::Base
private private
def render_check_results(results) def checks
flattened = results.flat_map do |name, result| ::Gitlab::HealthChecks::CHECKS
if result.is_a?(Gitlab::HealthChecks::Result)
[[name, result]]
else
result.map { |r| [name, r] }
end
end
success = flattened.all? { |name, r| r.success }
response = flattened.map do |name, r|
info = { status: r.success ? 'ok' : 'failed' }
info['message'] = r.message if r.message
info[:labels] = r.labels if r.labels
[name, info]
end
# disable static error pages at the gitlab-workhorse level, we want to see this error response even in production
headers["X-GitLab-Custom-Error"] = 1 unless success
render json: response.to_h, status: success ? :ok : :service_unavailable
end end
end end
...@@ -101,6 +101,6 @@ describe Geo::RepositorySyncWorker, :geo, :clean_gitlab_redis_cache do ...@@ -101,6 +101,6 @@ describe Geo::RepositorySyncWorker, :geo, :clean_gitlab_redis_cache do
end end
def result(success, shard) def result(success, shard)
Gitlab::HealthChecks::Result.new(success, nil, { shard: shard }) Gitlab::HealthChecks::Result.new('gitaly_check', success, nil, { shard: shard })
end end
end end
...@@ -80,6 +80,6 @@ describe Geo::RepositoryVerification::Primary::BatchWorker, :clean_gitlab_redis_ ...@@ -80,6 +80,6 @@ describe Geo::RepositoryVerification::Primary::BatchWorker, :clean_gitlab_redis_
end end
def result(success, shard) def result(success, shard)
Gitlab::HealthChecks::Result.new(success, nil, { shard: shard }) Gitlab::HealthChecks::Result.new('gitaly_check', success, nil, { shard: shard })
end end
end end
...@@ -82,6 +82,6 @@ describe Geo::RepositoryVerification::Secondary::SchedulerWorker, :clean_gitlab_ ...@@ -82,6 +82,6 @@ describe Geo::RepositoryVerification::Secondary::SchedulerWorker, :clean_gitlab_
end end
def result(success, shard) def result(success, shard)
Gitlab::HealthChecks::Result.new(success, nil, { shard: shard }) Gitlab::HealthChecks::Result.new('gitaly_check', success, nil, { shard: shard })
end end
end end
...@@ -26,9 +26,9 @@ describe Geo::Scheduler::PerShardSchedulerWorker do ...@@ -26,9 +26,9 @@ describe Geo::Scheduler::PerShardSchedulerWorker do
let(:other_shard_name) { 'other' } let(:other_shard_name) { 'other' }
let(:unhealthy_shard_name) { 'unhealthy' } let(:unhealthy_shard_name) { 'unhealthy' }
let(:default_shard) { Gitlab::HealthChecks::Result.new(true, nil, shard: default_shard_name) } let(:default_shard) { Gitlab::HealthChecks::Result.new('gitaly_check', true, nil, shard: default_shard_name) }
let(:other_shard) { Gitlab::HealthChecks::Result.new(true, nil, shard: other_shard_name) } let(:other_shard) { Gitlab::HealthChecks::Result.new('gitaly_check', true, nil, shard: other_shard_name) }
let(:unhealthy_shard) { Gitlab::HealthChecks::Result.new(false, '14:Connect Failed', shard: unhealthy_shard_name) } let(:unhealthy_shard) { Gitlab::HealthChecks::Result.new('gitaly_check', false, '14:Connect Failed', shard: unhealthy_shard_name) }
before do before do
allow(Gitlab::HealthChecks::GitalyCheck).to receive(:readiness).and_return([default_shard, other_shard, unhealthy_shard]) allow(Gitlab::HealthChecks::GitalyCheck).to receive(:readiness).and_return([default_shard, other_shard, unhealthy_shard])
......
# frozen_string_literal: true
module Gitlab
module HealthChecks
CHECKS = [
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::Redis::RedisCheck,
Gitlab::HealthChecks::Redis::CacheCheck,
Gitlab::HealthChecks::Redis::QueuesCheck,
Gitlab::HealthChecks::Redis::SharedStateCheck,
Gitlab::HealthChecks::GitalyCheck
].freeze
end
end
...@@ -29,7 +29,13 @@ module Gitlab ...@@ -29,7 +29,13 @@ module Gitlab
def check(storage_name) def check(storage_name)
serv = Gitlab::GitalyClient::HealthCheckService.new(storage_name) serv = Gitlab::GitalyClient::HealthCheckService.new(storage_name)
result = serv.check result = serv.check
HealthChecks::Result.new(result[:success], result[:message], shard: storage_name)
HealthChecks::Result.new(
name,
result[:success],
result[:message],
shard: storage_name
)
end end
private private
......
# frozen_string_literal: true # frozen_string_literal: true
module Gitlab::HealthChecks module Gitlab
module HealthChecks
Metric = Struct.new(:name, :value, :labels) Metric = Struct.new(:name, :value, :labels)
end
end end
# frozen_string_literal: true
module Gitlab
module HealthChecks
class PrometheusTextFormat
def marshal(metrics)
"#{metrics_with_type_declarations(metrics).join("\n")}\n"
end
private
def metrics_with_type_declarations(metrics)
type_declaration_added = {}
metrics.flat_map do |metric|
metric_lines = []
unless type_declaration_added.key?(metric.name)
type_declaration_added[metric.name] = true
metric_lines << metric_type_declaration(metric)
end
metric_lines << metric_text(metric)
end
end
def metric_type_declaration(metric)
"# TYPE #{metric.name} gauge"
end
def metric_text(metric)
labels = metric.labels&.map { |key, value| "#{key}=\"#{value}\"" }&.join(',') || ''
if labels.empty?
"#{metric.name} #{metric.value}"
else
"#{metric.name}{#{labels}} #{metric.value}"
end
end
end
end
end
# frozen_string_literal: true # frozen_string_literal: true
module Gitlab::HealthChecks module Gitlab
Result = Struct.new(:success, :message, :labels) module HealthChecks
Result = Struct.new(:name, :success, :message, :labels) do
def payload
{
status: success ? 'ok' : 'failed',
message: message,
labels: labels
}.compact
end
end
end
end end
...@@ -8,14 +8,14 @@ module Gitlab ...@@ -8,14 +8,14 @@ module Gitlab
def readiness def readiness
check_result = check check_result = check
if successful?(check_result) if successful?(check_result)
HealthChecks::Result.new(true) HealthChecks::Result.new(name, true)
elsif check_result.is_a?(Timeout::Error) elsif check_result.is_a?(Timeout::Error)
HealthChecks::Result.new(false, "#{human_name} check timed out") HealthChecks::Result.new(name, false, "#{human_name} check timed out")
else else
HealthChecks::Result.new(false, "unexpected #{human_name} check result: #{check_result}") HealthChecks::Result.new(name, false, "unexpected #{human_name} check result: #{check_result}")
end end
rescue => e rescue => e
HealthChecks::Result.new(false, "unexpected #{human_name} check result: #{e}") HealthChecks::Result.new(name, false, "unexpected #{human_name} check result: #{e}")
end end
def metrics def metrics
......
...@@ -32,7 +32,8 @@ describe HealthController do ...@@ -32,7 +32,8 @@ describe HealthController do
end end
it 'responds with readiness checks data when a failure happens' do it 'responds with readiness checks data when a failure happens' do
allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(Gitlab::HealthChecks::Result.new(false, "check error")) allow(Gitlab::HealthChecks::Redis::RedisCheck).to receive(:readiness).and_return(
Gitlab::HealthChecks::Result.new('redis_check', false, "check error"))
subject subject
......
...@@ -18,13 +18,13 @@ describe Gitlab::HealthChecks::GitalyCheck do ...@@ -18,13 +18,13 @@ describe Gitlab::HealthChecks::GitalyCheck do
context 'Gitaly server is up' do context 'Gitaly server is up' do
let(:gitaly_check) { double(check: { success: true }) } let(:gitaly_check) { double(check: { success: true }) }
it { is_expected.to eq([result_class.new(true, nil, shard: 'default')]) } it { is_expected.to eq([result_class.new('gitaly_check', true, nil, shard: 'default')]) }
end end
context 'Gitaly server is down' do context 'Gitaly server is down' do
let(:gitaly_check) { double(check: { success: false, message: 'Connection refused' }) } let(:gitaly_check) { double(check: { success: false, message: 'Connection refused' }) }
it { is_expected.to eq([result_class.new(false, 'Connection refused', shard: 'default')]) } it { is_expected.to eq([result_class.new('gitaly_check', false, 'Connection refused', shard: 'default')]) }
end end
end end
......
describe Gitlab::HealthChecks::PrometheusTextFormat do
let(:metric_class) { Gitlab::HealthChecks::Metric }
subject { described_class.new }
describe '#marshal' do
let(:sample_metrics) do
[metric_class.new('metric1', 1),
metric_class.new('metric2', 2)]
end
it 'marshal to text with non repeating type definition' do
expected = <<-EXPECTED.strip_heredoc
# TYPE metric1 gauge
metric1 1
# TYPE metric2 gauge
metric2 2
EXPECTED
expect(subject.marshal(sample_metrics)).to eq(expected)
end
context 'metrics where name repeats' do
let(:sample_metrics) do
[metric_class.new('metric1', 1),
metric_class.new('metric1', 2),
metric_class.new('metric2', 3)]
end
it 'marshal to text with non repeating type definition' do
expected = <<-EXPECTED.strip_heredoc
# TYPE metric1 gauge
metric1 1
metric1 2
# TYPE metric2 gauge
metric2 3
EXPECTED
expect(subject.marshal(sample_metrics)).to eq(expected)
end
end
end
end
...@@ -30,8 +30,8 @@ describe RepositoryCheck::DispatchWorker do ...@@ -30,8 +30,8 @@ describe RepositoryCheck::DispatchWorker do
context 'with unhealthy shard' do context 'with unhealthy shard' do
let(:default_shard_name) { 'default' } let(:default_shard_name) { 'default' }
let(:unhealthy_shard_name) { 'unhealthy' } let(:unhealthy_shard_name) { 'unhealthy' }
let(:default_shard) { Gitlab::HealthChecks::Result.new(true, nil, shard: default_shard_name) } let(:default_shard) { Gitlab::HealthChecks::Result.new('gitaly_check', true, nil, shard: default_shard_name) }
let(:unhealthy_shard) { Gitlab::HealthChecks::Result.new(false, '14:Connect Failed', shard: unhealthy_shard_name) } let(:unhealthy_shard) { Gitlab::HealthChecks::Result.new('gitaly_check', false, '14:Connect Failed', shard: unhealthy_shard_name) }
before do before do
allow(Gitlab::HealthChecks::GitalyCheck).to receive(:readiness).and_return([default_shard, unhealthy_shard]) allow(Gitlab::HealthChecks::GitalyCheck).to receive(:readiness).and_return([default_shard, unhealthy_shard])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment