Add Prometheus metrics to track Geo autocorrect numbers

parent 3ab1648e
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180726172057) do
ActiveRecord::Schema.define(version: 20180803001726) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -1159,6 +1159,8 @@ ActiveRecord::Schema.define(version: 20180726172057) do
t.integer "wikis_checksum_failed_count"
t.integer "wikis_checksum_mismatch_count"
t.binary "storage_configuration_digest"
t.integer "repositories_retrying_verification_count"
t.integer "wikis_retrying_verification_count"
end
add_index "geo_node_statuses", ["geo_node_id"], name: "index_geo_node_statuses_on_geo_node_id", unique: true, using: :btree
......
......@@ -58,7 +58,7 @@ Sidekiq jobs may also gather metrics, and these metrics can be accessed if the S
the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`.
| Metric | Type | Since | Description | Labels |
|:------------------------------------------- |:------- |:----- |:----------- |:------ |
|:-------------------------------------------- |:------- |:----- |:----------- |:------ |
| geo_db_replication_lag_seconds | Gauge | 10.2 | Database replication lag (seconds) | url
| geo_repositories | Gauge | 10.2 | Total number of repositories available on primary | url
| geo_repositories_synced | Gauge | 10.2 | Number of repositories synced on secondary | url
......@@ -90,6 +90,8 @@ the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`.
| geo_wikis_checksum_mismatch_count | Gauge | 10.7 | Number of wikis that checksum mismatch on secondary | url
| geo_repositories_checked_count | Gauge | 11.1 | Number of repositories that have been checked via `git fsck` | url
| geo_repositories_checked_failed_count | Gauge | 11.1 | Number of repositories that have a failure from `git fsck` | url
| geo_repositories_retrying_verification_count | Gauge | 11.2 | Number of repositories verification failures that Geo is actively trying to correct on secondary | url
| geo_wikis_retrying_verification_count | Gauge | 11.2 | Number of wikis verification failures that Geo is actively trying to correct on secondary | url
### Ruby metrics
......
......@@ -206,6 +206,8 @@ Example response:
"wikis_verification_failed_count": 3,
"wikis_verified_in_percentage": "24.39%",
"wikis_checksum_mismatch_count": 1,
"repositories_retrying_verification_count": 1,
"wikis_retrying_verification_count": 3,
"repositories_checked_count": 7,
"repositories_checked_failed_count": 2,
"repositories_checked_in_percentage": "17.07%",
......@@ -265,6 +267,8 @@ Example response:
"wikis_verification_failed_count": 3,
"wikis_verified_in_percentage": "24.39%",
"wikis_checksum_mismatch_count": 1,
"repositories_retrying_verification_count": 4,
"wikis_retrying_verification_count": 2,
"repositories_checked_count": 5,
"repositories_checked_failed_count": 1,
"repositories_checked_in_percentage": "12.20%",
......
......@@ -88,6 +88,14 @@ module Geo
Geo::ProjectRegistry.wiki_checksum_mismatch.count
end
def count_repositories_retrying_verification
Geo::ProjectRegistry.repositories_retrying_verification.count
end
def count_wikis_retrying_verification
Geo::ProjectRegistry.wikis_retrying_verification.count
end
def count_verification_failed_repositories
find_verification_failed_project_registries('repository').count
end
......
......@@ -53,6 +53,20 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(repository_checksum_mismatch.or(wiki_checksum_mismatch))
end
def self.repositories_retrying_verification
where(
arel_table[:repository_verification_retry_count].gt(0)
.and(arel_table[:resync_repository].eq(true))
)
end
def self.wikis_retrying_verification
where(
arel_table[:wiki_verification_retry_count].gt(0)
.and(arel_table[:resync_wiki].eq(true))
)
end
def self.retry_due
where(
arel_table[:repository_retry_at].lt(Time.now)
......
......@@ -77,7 +77,9 @@ class GeoNodeStatus < ActiveRecord::Base
hashed_storage_migrated_max_id: 'Highest ID present in projects migrated to hashed storage',
hashed_storage_attachments_max_id: 'Highest ID present in attachments migrated to hashed storage',
repositories_checked_count: 'Number of repositories checked',
repositories_checked_failed_count: 'Number of failed repositories checked'
repositories_checked_failed_count: 'Number of failed repositories checked',
repositories_retrying_verification_count: 'Number of repositories verification failures that Geo is actively trying to correct on secondary',
wikis_retrying_verification_count: 'Number of wikis verification failures that Geo is actively trying to correct on secondary'
}.freeze
EXPIRATION_IN_MINUTES = 5
......@@ -238,6 +240,8 @@ class GeoNodeStatus < ActiveRecord::Base
self.wikis_verified_count = projects_finder.count_verified_wikis
self.wikis_verification_failed_count = projects_finder.count_verification_failed_wikis
self.wikis_checksum_mismatch_count = projects_finder.count_wikis_checksum_mismatch
self.repositories_retrying_verification_count = projects_finder.count_repositories_retrying_verification
self.wikis_retrying_verification_count = projects_finder.count_wikis_retrying_verification
end
end
......
# frozen_string_literal: true
class AddVerificationRetryCountsToGeoNodeStatuses < ActiveRecord::Migration
DOWNTIME = false
def change
add_column :geo_node_statuses, :repositories_retrying_verification_count, :integer
add_column :geo_node_statuses, :wikis_retrying_verification_count, :integer
end
end
......@@ -331,6 +331,9 @@ module EE
end
expose :wikis_checksum_mismatch_count
expose :repositories_retrying_verification_count
expose :wikis_retrying_verification_count
expose :replication_slots_count
expose :replication_slots_used_count
expose :replication_slots_used_in_percentage do |node|
......
......@@ -33,6 +33,8 @@ FactoryBot.define do
wikis_verified_count 499
wikis_verification_failed_count 99
wikis_checksum_mismatch_count 10
repositories_retrying_verification_count 25
wikis_retrying_verification_count 3
last_event_id 2
last_event_timestamp { Time.now.to_i }
cursor_last_event_id 1
......
......@@ -40,6 +40,8 @@
"wikis_verification_failed_count",
"wikis_verified_in_percentage",
"wikis_checksum_mismatch_count",
"repositories_retrying_verification_count",
"wikis_retrying_verification_count",
"repositories_checked_count",
"repositories_checked_failed_count",
"repositories_checked_in_percentage",
......@@ -103,6 +105,8 @@
"wikis_verification_failed_count": { "type": ["integer", "null"] },
"wikis_verified_in_percentage": { "type": "string" },
"wikis_checksum_mismatch_count": { "type": ["integer", "null"] },
"repositories_retrying_verification_count": { "type": ["integer", "null"] },
"wikis_retrying_verification_count": { "type": ["integer", "null"] },
"repositories_checked_count": { "type": ["integer", "null"] },
"repositories_checked_failed_count": { "type": ["integer", "null"] },
"repositories_checked_in_percentage": { "type": "string" },
......
......@@ -712,6 +712,27 @@ describe GeoNodeStatus, :geo do
end
end
describe '#repositories_retrying_verification_count' do
before do
stub_current_geo_node(secondary)
end
it 'returns the right number of repositories retrying verification' do
create(:geo_project_registry, :repository_verification_failed, repository_verification_retry_count: 1)
create(:geo_project_registry, :repository_verification_failed, repository_verification_retry_count: nil)
create(:geo_project_registry, :repository_verified)
expect(subject.repositories_retrying_verification_count).to eq(1)
end
it 'returns existing value when feature flag if off' do
allow(Gitlab::Geo).to receive(:repository_verification_enabled?).and_return(false)
create(:geo_node_status, :healthy, geo_node: secondary)
expect(subject.repositories_retrying_verification_count).to eq(25)
end
end
describe '#wikis_verified_count' do
before do
stub_current_geo_node(secondary)
......@@ -773,6 +794,27 @@ describe GeoNodeStatus, :geo do
end
end
describe '#wikis_retrying_verification_count' do
before do
stub_current_geo_node(secondary)
end
it 'returns the right number of wikis retrying verification' do
create(:geo_project_registry, :wiki_verification_failed, wiki_verification_retry_count: 1)
create(:geo_project_registry, :wiki_verification_failed, wiki_verification_retry_count: nil)
create(:geo_project_registry, :wiki_verified)
expect(subject.wikis_retrying_verification_count).to eq(1)
end
it 'returns existing value when feature flag if off' do
allow(Gitlab::Geo).to receive(:repository_verification_enabled?).and_return(false)
create(:geo_node_status, :healthy, geo_node: secondary)
expect(subject.wikis_retrying_verification_count).to eq(3)
end
end
describe '#last_event_id and #last_event_date' do
it 'returns nil when no events are available' do
expect(subject.last_event_id).to be_nil
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment