Commit e811a8c6 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'sh-check-replication-secondary' into 'master'

Add a health check for the state of Geo replication lag in secondary

Closes #1929

See merge request !1610
parents 6f7f990f 45e09f21
......@@ -13,6 +13,7 @@ class GeoNodeStatus {
this.$el = $(el);
this.$icon = $('.js-geo-node-icon', this.$el);
this.$loadingIcon = $('.js-geo-node-loading', this.$el);
this.$dbReplicationLag = $('.js-db-replication-lag', this.$status);
this.$healthStatus = $('.js-health-status', this.$el);
this.$status = $('.js-geo-node-status', this.$el);
this.$repositoriesSynced = $('.js-repositories-synced', this.$status);
......@@ -36,6 +37,15 @@ class GeoNodeStatus {
$.getJSON(this.endpoint, (status) => {
this.setStatusIcon(status.healthy);
this.setHealthStatus(status.healthy);
// Replication lag can be nil if the secondary isn't actually streaming
if (status.db_replication_lag) {
const parsedTime = gl.utils.prettyTime.parseSeconds(status.db_replication_lag);
this.$dbReplicationLag.text(gl.utils.prettyTime.stringifyTime(parsedTime));
} else {
this.$dbReplicationLag.text('UNKNOWN');
}
this.$repositoriesSynced.html(`${status.repositories_synced_count}/${status.repositories_count} (${status.repositories_synced_in_percentage})`);
this.$repositoriesFailed.html(status.repositories_failed_count);
this.$lfsObjectsSynced.html(`${status.lfs_objects_synced_count}/${status.lfs_objects_count} (${status.lfs_objects_synced_in_percentage})`);
......
......@@ -14,6 +14,14 @@ class GeoNodeStatus
health.blank?
end
def db_replication_lag
@db_replication_lag ||= Gitlab::Geo::HealthCheck.db_replication_lag
end
def db_replication_lag=(value)
@db_replication_lag = value
end
def repositories_count
@repositories_count ||= repositories.count
end
......
......@@ -14,6 +14,8 @@ class GeoNodeStatusEntity < Grape::Entity
number_to_percentage(node.attachments_synced_in_percentage, precision: 2)
end
expose :db_replication_lag
expose :lfs_objects_count
expose :lfs_objects_synced_count
expose :lfs_objects_synced_in_percentage do |node|
......
......@@ -5,6 +5,7 @@ module Geo
KEYS = %w(
health
db_replication_lag
repositories_count
repositories_synced_count
repositories_failed_count
......
......@@ -45,6 +45,10 @@
%span.help-block
Health Status:
%span.js-health-status
%p
%span.help-block
Database replication lag:
%strong.node-info.js-db-replication-lag
%p
%span.help-block
Repositories synced:
......
......@@ -947,6 +947,7 @@ module API
class GeoNodeStatus < Grape::Entity
expose :id
expose :db_replication_lag
expose :health
expose :healthy?, as: :healthy
expose :repositories_count
......
......@@ -7,6 +7,7 @@ module Gitlab
return '' unless Gitlab::Geo.secondary?
return 'The Geo database configuration file is missing.' unless Gitlab::Geo.geo_database_configured?
return 'The Geo node has a database that is not configured for streaming replication with the primary node.' unless self.database_secondary?
return 'The Geo node does not appear to be replicating data from the primary node.' unless self.db_replication_lag.present?
database_version = self.get_database_version.to_i
migration_version = self.get_migration_version.to_i
......@@ -54,12 +55,24 @@ module Gitlab
end
def self.database_secondary?
raise NotImplementedError unless Gitlab::Database.postgresql?
ActiveRecord::Base.connection.execute('SELECT pg_is_in_recovery()')
.first
.fetch('pg_is_in_recovery') == 't'
end
def self.db_replication_lag
# Obtain the replication lag in seconds
ActiveRecord::Base.connection.execute('
SELECT CASE
WHEN pg_last_xlog_receive_location() = pg_last_xlog_replay_location()
THEN 0
ELSE
EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER
END
AS replication_lag')
.first
.fetch('replication_lag')
end
end
end
end
......@@ -8,6 +8,7 @@
"attachments_synced_count",
"lfs_objects_count",
"lfs_objects_synced_count",
"db_replication_lag",
"repositories_count",
"repositories_failed_count",
"repositories_synced_count"
......@@ -19,6 +20,7 @@
"attachments_count": { "type": "integer" },
"attachments_synced_count": { "type": "integer" },
"attachments_synced_in_percentage": { "type": "string" },
"db_replication_lag": { "type": ["integer", "null"] },
"lfs_objects_count": { "type": "integer" },
"lfs_objects_synced_count": { "type": "integer" },
"lfs_objects_synced_in_percentage": { "type": "string" },
......
......@@ -12,6 +12,7 @@ describe Gitlab::Geo::HealthCheck, :postgresql do
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(described_class).to receive(:get_database_version).and_return('20170101')
allow(described_class).to receive(:get_migration_version).and_return('20170201')
allow(described_class).to receive(:db_replication_lag).and_return(0)
message = subject.perform_checks
......@@ -27,8 +28,18 @@ describe Gitlab::Geo::HealthCheck, :postgresql do
it 'returns an error when database is not configured for streaming replication' do
allow(Gitlab::Geo).to receive(:secondary?) { true }
allow(Gitlab::Geo).to receive(:configured?) { true }
allow(Gitlab::Database).to receive(:postgresql?) { true }
allow(ActiveRecord::Base).to receive_message_chain(:connection, :execute, :first, :fetch) { 'f' }
allow(described_class).to receive(:database_secondary?) { false }
expect(subject.perform_checks).not_to be_blank
end
it 'returns an error when streaming replication is not working' do
allow(Gitlab::Geo).to receive(:secondary?) { true }
allow(Gitlab::Geo).to receive(:configured?) { true }
allow(Gitlab::Database).to receive(:postgresql?) { true }
allow(described_class).to receive(:database_secondary?) { false }
expect(subject.perform_checks).to include('not configured for streaming replication')
end
......@@ -42,6 +53,7 @@ describe Gitlab::Geo::HealthCheck, :postgresql do
it 'returns an error when Geo database version does not match the latest migration version' do
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(subject).to receive(:get_database_version) { 1 }
allow(described_class).to receive(:db_replication_lag).and_return(0)
expect(subject.perform_checks).to match(/Current Geo database version \([0-9]+\) does not match latest migration \([0-9]+\)/)
end
......@@ -49,9 +61,17 @@ describe Gitlab::Geo::HealthCheck, :postgresql do
it 'returns an error when latest migration version does not match the Geo database version' do
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(subject).to receive(:get_migration_version) { 1 }
allow(described_class).to receive(:db_replication_lag).and_return(0)
expect(subject.perform_checks).to match(/Current Geo database version \([0-9]+\) does not match latest migration \([0-9]+\)/)
end
it 'returns an error when replication lag is not present' do
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(described_class).to receive(:db_replication_lag).and_return(nil)
expect(subject.perform_checks).to match(/The Geo node does not appear to be replicating data from the primary node/)
end
end
describe 'MySQL checks' do
......
......@@ -98,6 +98,14 @@ describe GeoNodeStatus do
end
end
describe '#db_replication_lag' do
it 'returns the set replication lag' do
allow(Gitlab::Geo::HealthCheck).to receive(:db_replication_lag).and_return(1000)
expect(subject.db_replication_lag).to eq(1000)
end
end
describe '#lfs_objects_synced_in_percentage' do
let(:lfs_object_project) { create(:lfs_objects_project, project: project_1) }
......@@ -164,6 +172,7 @@ describe GeoNodeStatus do
context 'when no values are available' do
it 'returns 0 for each attribute' do
allow(Gitlab::Geo::HealthCheck).to receive(:db_replication_lag).and_return(nil)
subject.attachments_count = nil
subject.attachments_synced_count = nil
subject.lfs_objects_count = nil
......@@ -172,6 +181,7 @@ describe GeoNodeStatus do
subject.repositories_synced_count = nil
subject.repositories_failed_count = nil
expect(subject.db_replication_lag).to be_nil
expect(subject.repositories_count).to be_zero
expect(subject.repositories_synced_count).to be_zero
expect(subject.repositories_synced_in_percentage).to be_zero
......
......@@ -25,6 +25,7 @@ describe Geo::NodeStatusService do
it 'parses a 200 response' do
data = { health: 'OK',
db_replication_lag: 0,
repositories_count: 10,
repositories_synced_count: 1,
repositories_failed_count: 2,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment