Commit f79dfdc2 authored by Robert Speicher's avatar Robert Speicher

Merge branch '222343-discover-prometheus-from-consul-for-topology-ping' into 'master'

Allow discovering Prometheus from Consul for Topology Ping

Closes #222343

See merge request gitlab-org/gitlab!38060
parents e473ac00 1c5fb083
...@@ -1153,8 +1153,15 @@ production: &base ...@@ -1153,8 +1153,15 @@ production: &base
# yourself, and then update the values here. # yourself, and then update the values here.
# https://docs.gitlab.com/ee/administration/monitoring/prometheus/ # https://docs.gitlab.com/ee/administration/monitoring/prometheus/
prometheus: prometheus:
# Do not use `enable` and `listen_address` in any new code, as they are deprecated. Use `server_address` instead.
# https://gitlab.com/gitlab-org/gitlab/-/issues/227111
# enable: true # enable: true
# listen_address: 'localhost:9090' # listen_address: 'localhost:9090'
# server_address: 'localhost:9090'
## Consul settings
consul:
# api_url: 'http://localhost:8500'
shutdown: shutdown:
# # blackout_seconds: # # blackout_seconds:
......
# frozen_string_literal: true
module Gitlab
module Consul
class Internal
Error = Class.new(StandardError)
UnexpectedResponseError = Class.new(Gitlab::Consul::Internal::Error)
SocketError = Class.new(Gitlab::Consul::Internal::Error)
SSLError = Class.new(Gitlab::Consul::Internal::Error)
ECONNREFUSED = Class.new(Gitlab::Consul::Internal::Error)
class << self
def api_url
Gitlab.config.consul.api_url.to_s.presence if Gitlab.config.consul
rescue Settingslogic::MissingSetting
Gitlab::AppLogger.error('Consul api_url is not present in config/gitlab.yml')
nil
end
def discover_service(service_name:)
return unless service_name.present? && api_url
api_path = URI.join(api_url, '/v1/catalog/service/', URI.encode_www_form_component(service_name)).to_s
services = json_get(api_path, allow_local_requests: true, open_timeout: 5, read_timeout: 10)
# Use the first service definition
service = services&.first
return unless service
service_address = service['ServiceAddress'] || service['Address']
service_port = service['ServicePort']
[service_address, service_port]
end
def discover_prometheus_uri
service_address, service_port = discover_service(service_name: 'prometheus')
return unless service_address && service_port
# There really is not a way to discover whether a Prometheus connection is using TLS or not
# Try TLS first because HTTPS will return fast if failed.
%w[https http].find do |scheme|
connection_url = "#{scheme}://#{service_address}:#{service_port}"
break connection_url if Gitlab::PrometheusClient.new(connection_url, allow_local_requests: true).healthy?
rescue
nil
end
end
private
def json_get(path, options)
response = get(path, options)
code = response.try(:code)
body = response.try(:body)
raise Consul::Internal::UnexpectedResponseError unless code == 200 && body
parse_response_body(body)
end
def parse_response_body(body)
Gitlab::Json.parse(body)
rescue
raise Consul::Internal::UnexpectedResponseError
end
def get(path, options)
Gitlab::HTTP.get(path, options)
rescue ::SocketError
raise Consul::Internal::SocketError
rescue OpenSSL::SSL::SSLError
raise Consul::Internal::SSLError
rescue Errno::ECONNREFUSED
raise Consul::Internal::ECONNREFUSED
rescue
raise Consul::Internal::UnexpectedResponseError
end
end
end
end
end
...@@ -78,10 +78,10 @@ module Gitlab ...@@ -78,10 +78,10 @@ module Gitlab
end end
def with_prometheus_client(fallback: nil) def with_prometheus_client(fallback: nil)
return fallback unless Gitlab::Prometheus::Internal.prometheus_enabled? api_url = prometheus_api_url
return fallback unless api_url
prometheus_address = Gitlab::Prometheus::Internal.uri yield Gitlab::PrometheusClient.new(api_url, allow_local_requests: true)
yield Gitlab::PrometheusClient.new(prometheus_address, allow_local_requests: true)
end end
def measure_duration def measure_duration
...@@ -105,6 +105,14 @@ module Gitlab ...@@ -105,6 +105,14 @@ module Gitlab
private private
def prometheus_api_url
if Gitlab::Prometheus::Internal.prometheus_enabled?
Gitlab::Prometheus::Internal.uri
elsif Gitlab::Consul::Internal.api_url
Gitlab::Consul::Internal.discover_prometheus_uri
end
end
def redis_usage_counter def redis_usage_counter
yield yield
rescue ::Redis::CommandError, Gitlab::UsageDataCounters::BaseCounter::UnknownEvent rescue ::Redis::CommandError, Gitlab::UsageDataCounters::BaseCounter::UnknownEvent
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Consul::Internal do
let(:api_url) { 'http://127.0.0.1:8500' }
let(:consul_settings) do
{
api_url: api_url
}
end
before do
stub_config(consul: consul_settings)
end
describe '.api_url' do
it 'returns correct value' do
expect(described_class.api_url).to eq(api_url)
end
context 'when consul setting is not present in gitlab.yml' do
before do
allow(Gitlab.config).to receive(:consul).and_raise(Settingslogic::MissingSetting)
end
it 'does not fail' do
expect(described_class.api_url).to be_nil
end
end
end
shared_examples 'handles failure response' do
it 'raises Gitlab::Consul::Internal::SocketError when SocketError is rescued' do
stub_consul_discover_prometheus.to_raise(::SocketError)
expect { subject }
.to raise_error(described_class::SocketError)
end
it 'raises Gitlab::Consul::Internal::SSLError when OpenSSL::SSL::SSLError is rescued' do
stub_consul_discover_prometheus.to_raise(OpenSSL::SSL::SSLError)
expect { subject }
.to raise_error(described_class::SSLError)
end
it 'raises Gitlab::Consul::Internal::ECONNREFUSED when Errno::ECONNREFUSED is rescued' do
stub_consul_discover_prometheus.to_raise(Errno::ECONNREFUSED)
expect { subject }
.to raise_error(described_class::ECONNREFUSED)
end
it 'raises Consul::Internal::UnexpectedResponseError when StandardError is rescued' do
stub_consul_discover_prometheus.to_raise(StandardError)
expect { subject }
.to raise_error(described_class::UnexpectedResponseError)
end
it 'raises Consul::Internal::UnexpectedResponseError when request returns 500' do
stub_consul_discover_prometheus.to_return(status: 500, body: '{ message: "FAIL!" }')
expect { subject }
.to raise_error(described_class::UnexpectedResponseError)
end
it 'raises Consul::Internal::UnexpectedResponseError when request returns non json data' do
stub_consul_discover_prometheus.to_return(status: 200, body: 'not json')
expect { subject }
.to raise_error(described_class::UnexpectedResponseError)
end
end
shared_examples 'returns nil given blank value of' do |input_symbol|
[nil, ''].each do |value|
let(input_symbol) { value }
it { is_expected.to be_nil }
end
end
describe '.discover_service' do
subject { described_class.discover_service(service_name: service_name) }
let(:service_name) { 'prometheus' }
it_behaves_like 'returns nil given blank value of', :api_url
it_behaves_like 'returns nil given blank value of', :service_name
context 'one service discovered' do
before do
stub_consul_discover_prometheus.to_return(status: 200, body: '[{"ServiceAddress":"prom.net","ServicePort":9090}]')
end
it 'returns the service address and port' do
is_expected.to eq(["prom.net", 9090])
end
end
context 'multiple services discovered' do
before do
stub_consul_discover_prometheus
.to_return(status: 200, body: '[{"ServiceAddress":"prom_1.net","ServicePort":9090},{"ServiceAddress":"prom.net","ServicePort":9090}]')
end
it 'uses the first service' do
is_expected.to eq(["prom_1.net", 9090])
end
end
it_behaves_like 'handles failure response'
end
describe '.discover_prometheus_uri' do
subject { described_class.discover_prometheus_uri }
before do
stub_consul_discover_prometheus
.to_return(status: 200, body: '[{"ServiceAddress":"prom.net","ServicePort":9090}]')
stub_request(:get, /\/-\/healthy/)
.to_return(status: 200, body: Gitlab::PrometheusClient::HEALTHY_RESPONSE)
end
context 'both TLS and non-TLS connection are healthy' do
it 'returns https uri' do
is_expected.to eq('https://prom.net:9090')
end
end
context 'TLS connection is not healthy' do
before do
stub_request(:get, /https:\/\/.*\/-\/healthy/)
.to_return(status: 200, body: 'failed')
end
it 'returns http uri' do
is_expected.to eq('http://prom.net:9090')
end
end
context 'neither TLS nor non-TLS connection is healthy' do
before do
stub_request(:get, /https:\/\/.*\/-\/healthy/)
.to_return(status: 200, body: 'failed')
stub_request(:get, /http:\/\/.*\/-\/healthy/)
.to_return(status: 200, body: 'failed')
end
it 'returns nil' do
is_expected.to be_nil
end
end
it_behaves_like 'returns nil given blank value of', :api_url
it_behaves_like 'handles failure response'
end
def stub_consul_discover_prometheus
stub_request(:get, /v1\/catalog\/service\/prometheus/)
end
end
...@@ -61,7 +61,7 @@ RSpec.describe Gitlab::Prometheus::Internal do ...@@ -61,7 +61,7 @@ RSpec.describe Gitlab::Prometheus::Internal do
end end
end end
describe 'prometheus_enabled?' do describe '.prometheus_enabled?' do
it 'returns correct value' do it 'returns correct value' do
expect(described_class.prometheus_enabled?).to eq(true) expect(described_class.prometheus_enabled?).to eq(true)
end end
......
...@@ -13,12 +13,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -13,12 +13,7 @@ RSpec.describe Gitlab::UsageData::Topology do
allow(Process).to receive(:clock_gettime).and_return(0) allow(Process).to receive(:clock_gettime).and_return(0)
end end
context 'when embedded Prometheus server is enabled' do shared_examples 'query topology data from Prometheus' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090')
end
context 'tracking node metrics' do context 'tracking node metrics' do
it 'contains node level metrics for each instance' do it 'contains node level metrics for each instance' do
expect_prometheus_api_to( expect_prometheus_api_to(
...@@ -461,9 +456,29 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -461,9 +456,29 @@ RSpec.describe Gitlab::UsageData::Topology do
end end
end end
context 'when embedded Prometheus server is disabled' do context 'when Prometheus is available from Prometheus settings' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090')
end
include_examples 'query topology data from Prometheus'
end
context 'when Prometheus is available from Consul service discovery' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(Gitlab::Consul::Internal).to receive(:api_url).and_return('http://127.0.0.1:8500')
expect(Gitlab::Consul::Internal).to receive(:discover_prometheus_uri).and_return('http://prom.net:9090')
end
include_examples 'query topology data from Prometheus'
end
context 'when Prometheus is not available' do
it 'returns empty result with no failures' do it 'returns empty result with no failures' do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false) expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(Gitlab::Consul::Internal).to receive(:api_url).and_return(nil)
expect(subject[:topology]).to eq({ expect(subject[:topology]).to eq({
duration_s: 0, duration_s: 0,
......
...@@ -76,20 +76,37 @@ RSpec.describe Gitlab::Utils::UsageData do ...@@ -76,20 +76,37 @@ RSpec.describe Gitlab::Utils::UsageData do
end end
describe '#with_prometheus_client' do describe '#with_prometheus_client' do
context 'when Prometheus is enabled' do shared_examples 'query data from Prometheus' do
it 'yields a client instance and returns the block result' do it 'yields a client instance and returns the block result' do
result = described_class.with_prometheus_client { |client| client }
expect(result).to be_an_instance_of(Gitlab::PrometheusClient)
end
end
context 'when Prometheus is available from settings' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true) expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090') expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090')
end
result = described_class.with_prometheus_client { |client| client } it_behaves_like 'query data from Prometheus'
end
expect(result).to be_an_instance_of(Gitlab::PrometheusClient) context 'when Prometheus is available from Consul service discovery' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(Gitlab::Consul::Internal).to receive(:api_url).and_return('http://localhost:8500')
expect(Gitlab::Consul::Internal).to receive(:discover_prometheus_uri).and_return('http://prom:9090')
end end
it_behaves_like 'query data from Prometheus'
end end
context 'when Prometheus is disabled' do context 'when Prometheus is not available' do
before do before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false) expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(Gitlab::Consul::Internal).to receive(:api_url).and_return(nil)
end end
it 'returns nil by default' do it 'returns nil by default' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment