Commit 1302eb7c authored by Nick Thomas's avatar Nick Thomas

Merge branch '5612-invert-the-direction-of-geo-metrics-acquisition' into 'master'

Resolve "Invert the direction of Geo metrics acquisition"

Closes #5612

See merge request gitlab-org/gitlab-ee!5934
parents 92da0e49 59b5f127
......@@ -321,6 +321,8 @@ Example response:
}
```
Please note that the `health_status` parameter can only be in an "Healthy" or "Unhealthy" state, while the `health` parameter can be empty, "Healthy", or contain the actual error message.
## Retrieve project sync failures that occurred on the current node
......
......@@ -5,9 +5,6 @@ class GeoNodeStatus < ActiveRecord::Base
after_initialize :initialize_feature_flags
# Whether we were successful in reaching this node
attr_accessor :success
attr_writer :health_status
attr_accessor :storage_shards
attr_accessor :repository_verification_enabled
......@@ -76,14 +73,16 @@ class GeoNodeStatus < ActiveRecord::Base
hashed_storage_attachments_max_id: 'Highest ID present in attachments migrated to hashed storage'
}.freeze
EXPIRATION_IN_MINUTES = 5
HEALTHY_STATUS = 'Healthy'.freeze
UNHEALTHY_STATUS = 'Unhealthy'.freeze
def self.current_node_status
current_node = Gitlab::Geo.current_node
return unless current_node
status = current_node.find_or_build_status
# Since we're retrieving our own data, we mark this as a successful load
status.success = true
status.load_data_from_current_node
status.save if Gitlab::Geo.primary?
......@@ -92,13 +91,14 @@ class GeoNodeStatus < ActiveRecord::Base
end
def self.fast_current_node_status
# Primary's status is easy to calculate so we can calculate it on the fly
return current_node_status if Gitlab::Geo.primary?
spawn_worker
attrs = Rails.cache.read(cache_key)
attrs = Rails.cache.read(cache_key) || {}
if attrs
new(attrs)
else
spawn_worker
nil
end
end
def self.spawn_worker
......@@ -117,9 +117,6 @@ class GeoNodeStatus < ActiveRecord::Base
EXCLUDED_PARAMS = %w[id created_at].freeze
EXTRA_PARAMS = %w[
success
health
health_status
last_event_timestamp
cursor_last_event_timestamp
storage_shards
......@@ -230,14 +227,30 @@ class GeoNodeStatus < ActiveRecord::Base
end
end
alias_attribute :health, :status_message
def healthy?
status_message.blank? || status_message == 'Healthy'.freeze
!outdated? && status_message_healthy?
end
def health
if outdated?
return "Status has not been updated in the past #{EXPIRATION_IN_MINUTES} minutes"
end
status_message
end
def health_status
@health_status || (healthy? ? 'Healthy' : 'Unhealthy')
healthy? ? HEALTHY_STATUS : UNHEALTHY_STATUS
end
def outdated?
return false unless updated_at
updated_at < EXPIRATION_IN_MINUTES.minutes.ago
end
def status_message_healthy?
status_message.blank? || status_message == HEALTHY_STATUS
end
def last_successful_status_check_timestamp
......
......@@ -5,45 +5,38 @@ module Geo
def execute
return unless Gitlab::Geo.enabled?
if Gitlab::Geo.primary?
fetch_secondary_geo_nodes_metrics
end
current_node_status&.update_cache!
send_status_to_primary(current_node, current_node_status) if Gitlab::Geo.secondary?
fetch_current_geo_node_metrics
update_prometheus_metrics(current_node, current_node_status) if prometheus_enabled?
if Gitlab::Geo.primary? && prometheus_enabled?
Gitlab::Geo.secondary_nodes.find_each { |node| update_prometheus_metrics(node, node.status) }
end
end
private
def fetch_secondary_geo_nodes_metrics
Gitlab::Geo.secondary_nodes.find_each { |node| fetch_geo_node_metrics(node) }
def current_node_status
@current_node_status ||= GeoNodeStatus.current_node_status
end
def fetch_current_geo_node_metrics
fetch_geo_node_metrics(Gitlab::Geo.current_node)
def current_node
@current_node ||= Gitlab::Geo.current_node
end
def fetch_geo_node_metrics(node)
return unless node&.enabled?
status = node_status(node)
unless status.success
def send_status_to_primary(node, status)
if !NodeStatusPostService.new.execute(status) && prometheus_enabled?
increment_failed_status_counter(node)
return
end
update_db_metrics(node, status) if Gitlab::Geo.primary?
status.update_cache! if node.current?
update_prometheus_metrics(node, status) if Gitlab::Metrics.prometheus_metrics_enabled?
end
def update_db_metrics(node, status)
db_status = node.find_or_build_status
def update_prometheus_metrics(node, status)
return unless node&.enabled?
db_status.update_attributes(status.attributes.compact.merge(last_successful_status_check_at: Time.now.utc))
end
return unless status
def update_prometheus_metrics(node, status)
GeoNodeStatus::PROMETHEUS_METRICS.each do |column, docstring|
value = status[column]
......@@ -54,10 +47,6 @@ module Geo
end
end
def node_status(node)
NodeStatusFetchService.new.call(node)
end
def increment_failed_status_counter(node)
failed_status_counter(node).increment
end
......@@ -65,7 +54,7 @@ module Geo
def failed_status_counter(node)
Gitlab::Metrics.counter(
:geo_status_failed_total,
'Total number of times status for Geo node failed to retrieve',
'Total number of times status for Geo node failed to be sent to the primary',
metric_labels(node))
end
......@@ -81,5 +70,9 @@ module Geo
def metric_labels(node)
{ url: node.url }
end
def prometheus_enabled?
Gitlab::Metrics.prometheus_metrics_enabled?
end
end
end
module Geo
class NodeStatusFetchService
def call(geo_node)
return GeoNodeStatus.current_node_status if geo_node.current?
data = GeoNodeStatus.find_or_initialize_by(geo_node: geo_node).attributes
data = data.merge(success: false, health_status: 'Offline')
begin
response = Gitlab::HTTP.get(geo_node.status_url, allow_local_requests: true, headers: headers, timeout: timeout)
data[:success] = response.success?
if response.success?
if response.parsed_response.is_a?(Hash)
data.merge!(response.parsed_response)
else
data[:health] = 'A JSON response was not received'
end
else
message = "Could not connect to Geo node - HTTP Status Code: #{response.code} #{response.message}"
payload = response.parsed_response
details =
if payload.is_a?(Hash)
payload['message']
else
# The return value can be a giant blob of HTML; ignore it
''
end
data[:health] = [message, details].compact.join("\n")
end
rescue Gitlab::Geo::GeoNodeNotFoundError
data[:health] = 'This GitLab instance does not appear to be configured properly as a Geo node. Make sure the URLs are using the correct fully-qualified domain names.'
data[:health_status] = 'Unhealthy'
rescue OpenSSL::Cipher::CipherError
data[:health] = 'Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.'
data[:health_status] = 'Unhealthy'
rescue Gitlab::HTTP::Error, Timeout::Error, SocketError, SystemCallError, OpenSSL::SSL::SSLError => e
data[:health] = e.message
end
GeoNodeStatus.from_json(data.as_json)
end
private
def headers
Gitlab::Geo::BaseRequest.new.headers
end
def timeout
Gitlab::CurrentSettings.geo_status_timeout
end
end
end
module Geo
class NodeStatusPostService
include Gitlab::Geo::LogHelpers
def execute(status)
response = Gitlab::HTTP.post(primary_status_url, body: status.attributes, allow_local_requests: true, headers: headers, timeout: timeout)
unless response.success?
handle_failure_for(response)
return false
end
return true
rescue Gitlab::Geo::GeoNodeNotFoundError => e
log_error(e.to_s)
return false
rescue OpenSSL::Cipher::CipherError => e
log_error('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.', e)
return false
rescue Gitlab::HTTP::Error, Timeout::Error, SocketError, SystemCallError, OpenSSL::SSL::SSLError => e
log_error('Failed to post status data to primary', e)
return false
end
private
def handle_failure_for(response)
message = "Could not connect to Geo primary node - HTTP Status Code: #{response.code} #{response.message}"
payload = response.parsed_response
details =
if payload.is_a?(Hash)
payload['message']
else
# The return value can be a giant blob of HTML; ignore it
''
end
log_error([message, details].compact.join("\n"))
end
def primary_status_url
primary_node = Gitlab::Geo.primary_node
raise Gitlab::Geo::GeoNodeNotFoundError.new('Failed to look up Geo primary node in the database') unless primary_node
primary_node.status_url
end
def headers
Gitlab::Geo::BaseRequest.new.headers
end
def timeout
Gitlab::CurrentSettings.geo_status_timeout
end
end
end
---
title: "[Geo] Invert the direction of Geo metrics acquisition"
merge_request: 5934
author:
type: changed
......@@ -27,15 +27,18 @@ module API
end
end
# Get node information (e.g. health, repos synced, repos failed, etc.)
# Post current node information to primary (e.g. health, repos synced, repos failed, etc.)
#
# Example request:
# GET /geo/status
get 'status' do
# POST /geo/status
post 'status' do
authenticate_by_gitlab_geo_node_token!
status = ::GeoNodeStatus.fast_current_node_status
present status, with: EE::API::Entities::GeoNodeStatus
db_status = GeoNode.find(params[:geo_node_id]).find_or_build_status
unless db_status.update(params.merge(last_successful_status_check_at: Time.now.utc))
render_validation_error!(db_status)
end
end
end
end
......
......@@ -67,7 +67,7 @@ module API
if geo_node.current?
GeoNodeStatus.fast_current_node_status
else
::Geo::NodeStatusFetchService.new.call(geo_node)
geo_node.status
end
end
end
......
......@@ -275,7 +275,7 @@ namespace :geo do
puts 'N/A'
end
print 'Last status was pulled by primary node: '.rjust(COLUMN_WIDTH)
print 'Last status report was: '.rjust(COLUMN_WIDTH)
if current_node_status.updated_at
puts "#{time_ago_in_words(current_node_status.updated_at)} ago"
......
......@@ -4,7 +4,7 @@ FactoryBot.define do
storage_shards { StorageShard.all }
trait :healthy do
health nil
status_message nil
attachments_count 329
attachments_failed_count 13
attachments_synced_count 141
......@@ -43,7 +43,7 @@ FactoryBot.define do
end
trait :unhealthy do
health "Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\nTest"
status_message "Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\nTest"
end
end
end
......@@ -22,7 +22,7 @@ describe EE::API::Entities::GeoNodeStatus, :postgresql do
context 'when node is unhealthy' do
before do
geo_node_status.health = error
geo_node_status.status_message = error
end
subject { entity.as_json }
......@@ -36,13 +36,13 @@ describe EE::API::Entities::GeoNodeStatus, :postgresql do
describe '#health' do
context 'when node is healthy' do
it 'exposes the health message' do
expect(subject[:health]).to eq 'Healthy'
expect(subject[:health]).to eq GeoNodeStatus::HEALTHY_STATUS
end
end
context 'when node is unhealthy' do
before do
geo_node_status.health = error
geo_node_status.status_message = error
end
subject { entity.as_json }
......
......@@ -28,12 +28,6 @@ describe GeoNodeStatus, :geo do
described_class.fast_current_node_status
end
it 'returns status for primary with no cache' do
stub_current_geo_node(primary)
expect(described_class.fast_current_node_status).to eq described_class.current_node_status
end
end
describe '#update_cache!' do
......@@ -57,7 +51,7 @@ describe GeoNodeStatus, :geo do
context 'when health is present' do
it 'returns true' do
subject.status_message = 'Healthy'
subject.status_message = GeoNodeStatus::HEALTHY_STATUS
expect(subject.healthy?).to be true
end
......@@ -68,6 +62,36 @@ describe GeoNodeStatus, :geo do
expect(subject.healthy?).to be false
end
end
context 'takes outdated? into consideration' do
it 'return false' do
subject.status_message = GeoNodeStatus::HEALTHY_STATUS
subject.updated_at = 10.minutes.ago
expect(subject.healthy?).to be false
end
it 'return false' do
subject.status_message = 'something went wrong'
subject.updated_at = 1.minute.ago
expect(subject.healthy?).to be false
end
end
end
describe '#outdated?' do
it 'return true' do
subject.updated_at = 10.minutes.ago
expect(subject.outdated?).to be true
end
it 'return false' do
subject.updated_at = 1.minute.ago
expect(subject.outdated?).to be false
end
end
describe '#status_message' do
......@@ -78,6 +102,24 @@ describe GeoNodeStatus, :geo do
end
end
describe '#health' do
context 'takes outdated? into consideration' do
it 'returns expiration error' do
subject.status_message = GeoNodeStatus::HEALTHY_STATUS
subject.updated_at = 10.minutes.ago
expect(subject.health).to eq "Status has not been updated in the past #{described_class::EXPIRATION_IN_MINUTES} minutes"
end
it 'returns original message' do
subject.status_message = 'something went wrong'
subject.updated_at = 1.minute.ago
expect(subject.health).to eq 'something went wrong'
end
end
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#attachments_synced_count', :delete do
......
......@@ -92,8 +92,8 @@ describe API::GeoNodes, :geo, :prometheus, api: true do
it 'fetches the current node status' do
stub_current_geo_node(secondary)
expect(GeoNodeStatus).to receive(:fast_current_node_status).and_return(secondary_status)
expect(GeoNode).to receive(:find).and_return(secondary)
expect(GeoNodeStatus).to receive(:current_node_status).and_call_original
get api("/geo_nodes/#{secondary.id}/status", admin)
......@@ -101,6 +101,16 @@ describe API::GeoNodes, :geo, :prometheus, api: true do
expect(response).to match_response_schema('public_api/v4/geo_node_status', dir: 'ee')
end
it 'shows 404 response if current node status does not exist yet' do
stub_current_geo_node(secondary)
expect(GeoNode).to receive(:find).and_return(secondary)
get api("/geo_nodes/#{secondary.id}/status", admin)
expect(response).to have_gitlab_http_status(404)
end
it_behaves_like '404 response' do
let(:request) { get api("/geo_nodes/#{unexisting_node_id}/status", admin) }
end
......@@ -124,6 +134,8 @@ describe API::GeoNodes, :geo, :prometheus, api: true do
end
it 'returns 200 for the primary node' do
expect(GeoNodeStatus).to receive(:fast_current_node_status).and_return(secondary_status)
post api("/geo_nodes/#{primary.id}/repair", admin)
expect(response).to have_gitlab_http_status(200)
......
......@@ -13,20 +13,21 @@ describe API::Geo do
{ 'X-Gitlab-Token' => secondary_node.system_hook.token }
end
before do
stub_current_geo_node(secondary_node)
end
shared_examples 'with terms enforced' do
before do
enforce_terms
end
it 'responds with 200' do
it 'responds with 2xx HTTP response code' do
request
expect(response).to have_gitlab_http_status(200)
expect(response).to have_gitlab_http_status(:success)
end
end
describe '/geo/transfers' do
before do
stub_current_geo_node(secondary_node)
end
describe 'GET /geo/transfers/attachment/1' do
......@@ -212,14 +213,52 @@ describe API::Geo do
end
end
end
end
describe 'GET /geo/status', :postgresql do
describe 'POST /geo/status', :postgresql do
let(:geo_base_request) { Gitlab::Geo::BaseRequest.new }
subject(:request) { get api('/geo/status'), nil, geo_base_request.headers }
let(:data) do
{
geo_node_id: secondary_node.id,
status_message: nil,
db_replication_lag_seconds: 0,
repositories_count: 10,
repositories_synced_count: 1,
repositories_failed_count: 2,
wikis_count: 10,
wikis_synced_count: 2,
wikis_failed_count: 3,
lfs_objects_count: 100,
lfs_objects_synced_count: 50,
lfs_objects_failed_count: 12,
lfs_objects_synced_missing_on_primary_count: 4,
job_artifacts_count: 100,
job_artifacts_synced_count: 50,
job_artifacts_failed_count: 12,
job_artifacts_synced_missing_on_primary_count: 5,
attachments_count: 30,
attachments_synced_count: 30,
attachments_failed_count: 25,
attachments_synced_missing_on_primary_count: 6,
last_event_id: 2,
last_event_date: Time.now.utc,
cursor_last_event_id: 1,
cursor_last_event_date: Time.now.utc,
event_log_count: 55,
event_log_max_id: 555,
repository_created_max_id: 43,
repository_updated_max_id: 132,
repository_deleted_max_id: 23,
repository_renamed_max_id: 11,
repositories_changed_max_id: 109
}
end
subject(:request) { post api('/geo/status'), data, geo_base_request.headers }
it 'responds with 401 with invalid auth header' do
get api('/geo/status'), nil, Authorization: 'Test'
post api('/geo/status'), nil, Authorization: 'Test'
expect(response).to have_gitlab_http_status(401)
end
......@@ -232,34 +271,17 @@ describe API::Geo do
expect(response).to have_gitlab_http_status(401)
end
context 'when requesting secondary node with valid auth header' do
before do
stub_current_geo_node(secondary_node)
allow(geo_base_request).to receive(:requesting_node) { primary_node }
allow(::GeoNodeStatus).to receive(:fast_current_node_status).and_return(::GeoNodeStatus.current_node_status)
end
it 'responds with 200' do
request
expect(response).to have_gitlab_http_status(200)
expect(response).to match_response_schema('public_api/v4/geo_node_status', dir: 'ee')
end
it_behaves_like 'with terms enforced'
end
context 'when requesting primary node with valid auth header' do
before do
stub_current_geo_node(primary_node)
allow(geo_base_request).to receive(:requesting_node) { secondary_node }
end
it 'responds with 200' do
request
it 'updates the status and responds with 201' do
expect { request }.to change { GeoNodeStatus.count }.by(1)
expect(response).to have_gitlab_http_status(200)
expect(response).to match_response_schema('public_api/v4/geo_node_status', dir: 'ee')
expect(response).to have_gitlab_http_status(201)
expect(secondary_node.reload.status.repositories_count).to eq(10)
end
it_behaves_like 'with terms enforced'
......
......@@ -13,7 +13,6 @@ describe Geo::MetricsUpdateService, :geo, :prometheus do
let(:data) do
{
success: true,
status_message: nil,
db_replication_lag_seconds: 0,
repositories_count: 10,
......@@ -54,7 +53,6 @@ describe Geo::MetricsUpdateService, :geo, :prometheus do
let(:primary_data) do
{
success: true,
status_message: nil,
repositories_count: 10,
wikis_count: 10,
......@@ -79,8 +77,8 @@ describe Geo::MetricsUpdateService, :geo, :prometheus do
describe '#execute' do
before do
request = double(success?: true, parsed_response: data.stringify_keys, code: 200)
allow(Gitlab::HTTP).to receive(:get).and_return(request)
response = double(success?: true, parsed_response: data.stringify_keys, code: 200)
allow(Gitlab::HTTP).to receive(:post).and_return(response)
end
context 'when current node is nil' do
......@@ -88,8 +86,8 @@ describe Geo::MetricsUpdateService, :geo, :prometheus do
stub_current_geo_node(nil)
end
it 'skips fetching the status' do
expect(Gitlab::HTTP).to receive(:get).never
it 'skips posting the status' do
expect(Gitlab::HTTP).to receive(:post).never
subject.execute
end
......@@ -100,9 +98,21 @@ describe Geo::MetricsUpdateService, :geo, :prometheus do
stub_current_geo_node(primary)
end
it 'attempts to retrieve metrics from all nodes' do
it 'updates the cache' do
status = GeoNodeStatus.from_json(primary_data.as_json)
allow(GeoNodeStatus).to receive(:current_node_status).and_return(status)
expect(status).to receive(:update_cache!)
subject.execute
end
it 'updates metrics for all nodes' do
allow(GeoNodeStatus).to receive(:current_node_status).and_return(GeoNodeStatus.from_json(primary_data.as_json))
secondary.update(status: GeoNodeStatus.from_json(data.as_json))
another_secondary.update(status: GeoNodeStatus.from_json(data.as_json))
subject.execute
expect(Gitlab::Metrics.registry.get(:geo_db_replication_lag_seconds).values.count).to eq(2)
......@@ -113,29 +123,21 @@ describe Geo::MetricsUpdateService, :geo, :prometheus do
end
it 'updates the GeoNodeStatus entry' do
expect { subject.execute }.to change { GeoNodeStatus.count }.by(3)
status = secondary.status.load_data_from_current_node
expect(status.geo_node_id).to eq(secondary.id)
expect(status.last_successful_status_check_at).not_to be_nil
end
it 'updates only the active node' do
secondary.update_attributes(enabled: false)
expect { subject.execute }.to change { GeoNodeStatus.count }.by(2)
expect(another_secondary.status).not_to be_nil
expect { subject.execute }.to change { GeoNodeStatus.count }.by(1)
end
end
context 'when node is a secondary' do
subject { described_class.new }
before do
stub_current_geo_node(secondary)
allow(subject).to receive(:node_status).and_return(GeoNodeStatus.new(data))
@status = GeoNodeStatus.new(data.as_json)
allow(GeoNodeStatus).to receive(:current_node_status).and_return(@status)
end
it 'updates the cache' do
expect(@status).to receive(:update_cache!)
subject.execute
end
it 'adds gauges for various metrics' do
......@@ -179,7 +181,7 @@ describe Geo::MetricsUpdateService, :geo, :prometheus do
end
it 'increments a counter when metrics fail to retrieve' do
allow(subject).to receive(:node_status).and_return(GeoNodeStatus.new(success: false))
allow_any_instance_of(Geo::NodeStatusPostService).to receive(:execute).and_return(false)
# Run once to get the gauge set
subject.execute
......@@ -187,16 +189,6 @@ describe Geo::MetricsUpdateService, :geo, :prometheus do
expect { subject.execute }.to change { metric_value(:geo_status_failed_total) }.by(1)
end
it 'updates cache' do
status = GeoNodeStatus.new(success: true)
expect(status).to receive(:update_cache!)
allow(subject).to receive(:node_status).and_return(status)
subject.execute
end
it 'does not create GeoNodeStatus entries' do
expect { subject.execute }.to change { GeoNodeStatus.count }.by(0)
end
......
require 'spec_helper'
describe Geo::NodeStatusFetchService, :geo do
include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
subject { described_class.new }
describe '#call' do
it 'parses a 401 response' do
request = double(success?: false,
code: 401,
message: 'Unauthorized',
parsed_response: { 'message' => 'Test' } )
allow(Gitlab::HTTP).to receive(:get).and_return(request)
status = subject.call(secondary)
expect(status.status_message).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\nTest")
end
it 'always reload GeoNodeStatus if current node' do
stub_current_geo_node(secondary)
expect(GeoNodeStatus).to receive(:current_node_status).and_call_original
status = subject.call(secondary)
expect(status).to be_a(GeoNodeStatus)
end
it 'ignores certain parameters' do
yesterday = Date.yesterday
request = double(success?: true,
code: 200,
message: 'Unauthorized',
parsed_response: {
'id' => 5000,
'last_successful_status_check_at' => yesterday,
'created_at' => yesterday,
'updated_at' => yesterday
})
allow(described_class).to receive(:get).and_return(request)
status = subject.call(secondary)
expect(status.id).not_to be(5000)
expect(status.last_successful_status_check_at).not_to be(yesterday)
expect(status.created_at).not_to be(yesterday)
expect(status.updated_at).not_to be(yesterday)
end
it 'parses a 200 legacy response' do
data = { health: 'OK',
db_replication_lag_seconds: 0,
repositories_count: 10,
repositories_synced_count: 1,
repositories_failed_count: 2,
lfs_objects_count: 100,
lfs_objects_synced_count: 50,
lfs_objects_failed_count: 12,
job_artifacts_count: 100,
job_artifacts_synced_count: 50,
job_artifacts_failed_count: 12,
attachments_count: 30,
attachments_synced_count: 30,
attachments_failed_count: 25,
last_event_id: 2,
last_event_timestamp: Time.now.to_i,
cursor_last_event_id: 1,
cursor_last_event_timestamp: Time.now.to_i }
request = double(success?: true, parsed_response: data.stringify_keys, code: 200)
allow(Gitlab::HTTP).to receive(:get).and_return(request)
status = subject.call(secondary)
expect(status).to have_attributes(data)
expect(status.success).to be true
end
it 'handles invalid JSON response' do
request = double(success?: true,
code: 200,
message: 'Something here',
parsed_response: 'Something here')
allow(Gitlab::HTTP).to receive(:get).and_return(request)
status = subject.call(secondary)
expect(status.status_message).to eq("A JSON response was not received")
end
it 'omits full response text in status' do
request = double(success?: false,
code: 401,
message: 'Unauthorized',
parsed_response: '<html><h1>You are not allowed</h1></html>')
allow(Gitlab::HTTP).to receive(:get).and_return(request)
status = subject.call(secondary)
expect(status.status_message).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\n")
expect(status.success).to be false
end
it 'alerts on bad SSL certficate' do
message = 'bad certificate'
allow(Gitlab::HTTP).to receive(:get).and_raise(OpenSSL::SSL::SSLError.new(message))
status = subject.call(secondary)
expect(status.status_message).to eq(message)
end
it 'handles connection refused' do
allow(Gitlab::HTTP).to receive(:get).and_raise(Errno::ECONNREFUSED.new('bad connection'))
status = subject.call(secondary)
expect(status.status_message).to eq('Connection refused - bad connection')
end
it 'returns meaningful error message when primary uses incorrect db key' do
allow_any_instance_of(GeoNode).to receive(:secret_access_key).and_raise(OpenSSL::Cipher::CipherError)
status = subject.call(secondary)
expect(status.status_message).to eq('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.')
end
it 'gracefully handles case when primary is deleted' do
primary.destroy!
status = subject.call(secondary)
expect(status.status_message).to eq('This GitLab instance does not appear to be configured properly as a Geo node. Make sure the URLs are using the correct fully-qualified domain names.')
end
it 'returns the status from database if it could not fetch it' do
allow(Gitlab::HTTP).to receive(:get).and_raise(Errno::ECONNREFUSED.new('bad connection'))
db_status = create(:geo_node_status, :healthy, geo_node: secondary)
status = subject.call(secondary)
expect(status.status_message).to eq('Connection refused - bad connection')
expect(status).not_to be_healthy
expect(status.attachments_count).to eq(db_status.attachments_count)
expect(status.attachments_failed_count).to eq(db_status.attachments_failed_count)
expect(status.attachments_synced_count).to eq(db_status.attachments_synced_count)
expect(status.lfs_objects_count).to eq(db_status.lfs_objects_count)
expect(status.lfs_objects_failed_count).to eq(db_status.lfs_objects_failed_count)
expect(status.lfs_objects_synced_count).to eq(db_status.lfs_objects_synced_count)
expect(status.job_artifacts_count).to eq(db_status.job_artifacts_count)
expect(status.job_artifacts_failed_count).to eq(db_status.job_artifacts_failed_count)
expect(status.job_artifacts_synced_count).to eq(db_status.job_artifacts_synced_count)
expect(status.repositories_count).to eq(db_status.repositories_count)
expect(status.repositories_synced_count).to eq(db_status.repositories_synced_count)
expect(status.repositories_failed_count).to eq(db_status.repositories_failed_count)
expect(status.last_event_id).to eq(db_status.last_event_id)
expect(status.last_event_timestamp).to eq(db_status.last_event_timestamp)
expect(status.cursor_last_event_id).to eq(db_status.cursor_last_event_id)
expect(status.cursor_last_event_timestamp).to eq(db_status.cursor_last_event_timestamp)
expect(status.last_successful_status_check_timestamp).to eq(db_status.last_successful_status_check_timestamp)
end
end
end
require 'spec_helper'
describe Geo::NodeStatusPostService, :geo do
include ::EE::GeoHelpers
include ApiHelpers
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
subject { described_class.new }
describe '#execute' do
it 'parses a 401 response' do
response = double(success?: false,
code: 401,
message: 'Unauthorized',
parsed_response: { 'message' => 'Test' } )
allow(Gitlab::HTTP).to receive(:post).and_return(response)
expect(subject).to receive(:log_error).with("Could not connect to Geo primary node - HTTP Status Code: 401 Unauthorized\nTest")
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end
it 'alerts on bad SSL certficate' do
message = 'bad certificate'
allow(Gitlab::HTTP).to receive(:post).and_raise(OpenSSL::SSL::SSLError.new(message))
expect(subject).to receive(:log_error).with('Failed to post status data to primary', kind_of(OpenSSL::SSL::SSLError))
expect(subject.execute(secondary)).to be_falsey
end
it 'handles connection refused' do
allow(Gitlab::HTTP).to receive(:post).and_raise(Errno::ECONNREFUSED.new('bad connection'))
expect(subject).to receive(:log_error).with('Failed to post status data to primary', kind_of(Errno::ECONNREFUSED))
expect(subject.execute(secondary)).to be_falsey
end
it 'returns meaningful error message when primary uses incorrect db key' do
allow_any_instance_of(GeoNode).to receive(:secret_access_key).and_raise(OpenSSL::Cipher::CipherError)
expect(subject).to receive(:log_error).with(
"Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.",
kind_of(OpenSSL::Cipher::CipherError)
)
expect(subject.execute(secondary)).to be_falsey
end
it 'gracefully handles case when primary is deleted' do
primary.destroy!
expect(subject).to receive(:log_error).with(
'Failed to look up Geo primary node in the database'
)
expect(subject.execute(secondary)).to be_falsey
end
it 'sends geo_node_id in the request' do
stub_current_geo_node(primary)
expect(Gitlab::HTTP).to receive(:post)
.with(
primary.status_url,
hash_including(body: hash_including('geo_node_id' => secondary.id)))
.and_return(double(success?: true))
subject.execute(GeoNodeStatus.new({
geo_node_id: secondary.id,
status_message: nil,
db_replication_lag_seconds: 0,
repositories_count: 10
}))
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment