Commit 1eb6f691 authored by Nick Thomas's avatar Nick Thomas

Make the maximum capacity of Geo backfill operations configurable

parent 58397d10
...@@ -29,3 +29,17 @@ ...@@ -29,3 +29,17 @@
= hidden_field_tag "#{form.object_name}[namespace_ids]", geo_node.namespace_ids.join(","), class: 'js-geo-node-namespaces', data: { selected: node_namespaces_options(geo_node.namespaces).to_json } = hidden_field_tag "#{form.object_name}[namespace_ids]", geo_node.namespace_ids.join(","), class: 'js-geo-node-namespaces', data: { selected: node_namespaces_options(geo_node.namespaces).to_json }
.help-block .help-block
#{ s_("Choose which groups you wish to replicate to this secondary node. Leave blank to replicate all.") } #{ s_("Choose which groups you wish to replicate to this secondary node. Leave blank to replicate all.") }
.form-group.js-hide-if-geo-primary{ class: ('hidden' unless geo_node.secondary?) }
= form.label :repos_max_capacity, s_('Geo|Repository sync capacity'), class: 'control-label'
.col-sm-10
= form.number_field :repos_max_capacity, class: 'form-control', min: 0
.help-block
#{ s_('Control the maximum concurrency of repository backfill for this secondary node') }
.form-group.js-hide-if-geo-primary{ class: ('hidden' unless geo_node.secondary?) }
= form.label :files_max_capacity, s_('Geo|File sync capacity'), class: 'control-label'
.col-sm-10
= form.number_field :files_max_capacity, class: 'form-control', min: 0
.help-block
#{ s_('Control the maximum concurrency of LFS/attachment backfill for this secondary node') }
...@@ -5,7 +5,6 @@ module Geo ...@@ -5,7 +5,6 @@ module Geo
DB_RETRIEVE_BATCH_SIZE = 1000 DB_RETRIEVE_BATCH_SIZE = 1000
LEASE_TIMEOUT = 60.minutes LEASE_TIMEOUT = 60.minutes
MAX_CAPACITY = 10
RUN_TIME = 60.minutes.to_i RUN_TIME = 60.minutes.to_i
attr_reader :pending_resources, :scheduled_jobs, :start_time, :loops attr_reader :pending_resources, :scheduled_jobs, :start_time, :loops
...@@ -18,7 +17,7 @@ module Geo ...@@ -18,7 +17,7 @@ module Geo
# The scheduling works as the following: # The scheduling works as the following:
# #
# 1. Load a batch of IDs that we need to schedule (DB_RETRIEVE_BATCH_SIZE) into a pending list. # 1. Load a batch of IDs that we need to schedule (DB_RETRIEVE_BATCH_SIZE) into a pending list.
# 2. Schedule them so that at most MAX_CAPACITY are running at once. # 2. Schedule them so that at most `max_capacity` are running at once.
# 3. When a slot frees, schedule another job. # 3. When a slot frees, schedule another job.
# 4. When we have drained the pending list, load another batch into memory, and schedule the # 4. When we have drained the pending list, load another batch into memory, and schedule the
# remaining jobs, excluding ones in progress. # remaining jobs, excluding ones in progress.
...@@ -82,7 +81,7 @@ module Geo ...@@ -82,7 +81,7 @@ module Geo
end end
def max_capacity def max_capacity
MAX_CAPACITY raise NotImplementedError
end end
def run_time def run_time
......
...@@ -2,6 +2,10 @@ module Geo ...@@ -2,6 +2,10 @@ module Geo
class FileDownloadDispatchWorker < Geo::BaseSchedulerWorker class FileDownloadDispatchWorker < Geo::BaseSchedulerWorker
private private
def max_capacity
current_node.files_max_capacity
end
def schedule_job(object_db_id, object_type) def schedule_job(object_db_id, object_type)
job_id = GeoFileDownloadWorker.perform_async(object_type, object_db_id) job_id = GeoFileDownloadWorker.perform_async(object_type, object_db_id)
......
module Geo module Geo
class RepositorySyncWorker < Geo::BaseSchedulerWorker class RepositorySyncWorker < Geo::BaseSchedulerWorker
MAX_CAPACITY = 25
private private
def max_capacity def max_capacity
MAX_CAPACITY current_node.repos_max_capacity
end end
def schedule_job(project_id) def schedule_job(project_id)
......
---
title: Make the maximum capacity of Geo backfill operations configurable
merge_request: 3107
author:
type: added
class GeoConfigurableMaxCapacities < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_column_with_default :geo_nodes, :files_max_capacity, :integer, allow_null: false, default: 10
add_column_with_default :geo_nodes, :repos_max_capacity, :integer, allow_null: false, default: 25
end
def down
remove_column :geo_nodes, :files_max_capacity, :integer
remove_column :geo_nodes, :repos_max_capacity, :integer
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20171006091000) do ActiveRecord::Schema.define(version: 20171010140746) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
...@@ -758,6 +758,8 @@ ActiveRecord::Schema.define(version: 20171006091000) do ...@@ -758,6 +758,8 @@ ActiveRecord::Schema.define(version: 20171006091000) do
t.string "encrypted_secret_access_key" t.string "encrypted_secret_access_key"
t.string "encrypted_secret_access_key_iv" t.string "encrypted_secret_access_key_iv"
t.string "clone_url_prefix" t.string "clone_url_prefix"
t.integer "files_max_capacity", default: 10, null: false
t.integer "repos_max_capacity", default: 25, null: false
end end
add_index "geo_nodes", ["access_key"], name: "index_geo_nodes_on_access_key", using: :btree add_index "geo_nodes", ["access_key"], name: "index_geo_nodes_on_access_key", using: :btree
......
...@@ -5,13 +5,44 @@ For more information about setting up GitLab Geo, read the ...@@ -5,13 +5,44 @@ For more information about setting up GitLab Geo, read the
When you're done, you can navigate to **Admin area ➔ Geo nodes** (`/admin/geo_nodes`). When you're done, you can navigate to **Admin area ➔ Geo nodes** (`/admin/geo_nodes`).
In the following table you can see what all these settings mean: ## Common settings
All Geo nodes have the following settings:
| Setting | Description | | Setting | Description |
| --------- | ----------- | | --------| ----------- |
| Primary | This marks a Geo Node as primary. There can be only one primary, make sure that you first add the primary node and then all the others. | | Primary | This marks a Geo Node as primary. There can be only one primary, make sure that you first add the primary node and then all the others. |
| URL | Your instance's full URL, in the same way it is configured in `/etc/gitlab/gitlab.rb` (Omnibus GitLab installations) or `gitlab.yml` (source based installations). | | URL | The instance's full URL, in the same way it is configured in `/etc/gitlab/gitlab.rb` (Omnibus GitLab installations) or `gitlab.yml` (source based installations). |
The node you're reading from is indicated with a green `Current node` label, and
the primary is given a blue `Primary` label. Remember that you can only make
changes on the primary!
## Secondary node settings
Secondaries have a number of additional settings available:
| Setting | Description|
|--------------------------|------------|
| Public Key | The SSH public key of the user that your GitLab instance runs on (unless changed, should be the user `git`). | | Public Key | The SSH public key of the user that your GitLab instance runs on (unless changed, should be the user `git`). |
| Groups to replicate | Enable Geo selective sync for this secondary - only the selected groups will be synchronized. |
| Repository sync capacity | Number of concurrent requests this secondary will make to the primary when backfilling repositories. |
| File sync capacity | Number of concurrent requests this secondary will make to the primary when backfilling files. |
## Geo backfill
Secondaries are notified of changes to repositories and files by the primary,
and will always attempt to synchronize those changes as quickly as possible.
Backfill is the act of populating the secondary with repositories and files that
existed *before* the secondary was added to the database. Since there may be
extremely large numbers of repositories and files, it's infeasible to attempt to
download them all at once, so GitLab places an upper limit on the concurrency of
these operations.
A primary node will have a star right next to it to distinguish from the How long the backfill takes is a function of the maximum concurrency, but higher
secondaries. values place more strain on the primary node. From [GitLab 10.2](https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/3107),
the limits are configurable - if your primary node has lots of surplus capacity,
you can increase the values to complete backfill in a shorter time. If it's
under heavy load and backfill is reducing its availability for normal requests,
you can decrease them.
require 'spec_helper' require 'spec_helper'
describe GeoNode, type: :model do describe GeoNode, type: :model do
using RSpec::Parameterized::TableSyntax
include ::EE::GeoHelpers include ::EE::GeoHelpers
let(:new_node) { create(:geo_node, schema: 'https', host: 'localhost', port: 3000, relative_url_root: 'gitlab') } let(:new_node) { create(:geo_node, schema: 'https', host: 'localhost', port: 3000, relative_url_root: 'gitlab') }
...@@ -29,28 +30,22 @@ describe GeoNode, type: :model do ...@@ -29,28 +30,22 @@ describe GeoNode, type: :model do
context 'default values' do context 'default values' do
let(:gitlab_host) { 'gitlabhost' } let(:gitlab_host) { 'gitlabhost' }
before do where(:attribute, :value) do
allow(Gitlab.config.gitlab).to receive(:host) { gitlab_host } :schema | 'http'
end :host | 'gitlabhost'
:port | 80
it 'defines a default schema' do :relative_url_root | ''
expect(empty_node.schema).to eq('http') :primary | false
end :repos_max_capacity | 25
:files_max_capacity | 10
it 'defines a default host' do
expect(empty_node.host).to eq(gitlab_host)
end
it 'defines a default port' do
expect(empty_node.port).to eq(80)
end end
it 'defines a default relative_url_root' do with_them do
expect(empty_node.relative_url_root).to eq('') before do
allow(Gitlab.config.gitlab).to receive(:host) { gitlab_host }
end end
it 'defines a default primary flag' do it { expect(empty_node[attribute]).to eq(value) }
expect(empty_node.primary).to eq(false)
end end
end end
......
...@@ -61,7 +61,7 @@ describe Geo::FileDownloadDispatchWorker, :postgresql do ...@@ -61,7 +61,7 @@ describe Geo::FileDownloadDispatchWorker, :postgresql do
# 2. We send 2, wait for 1 to finish, and then send again. # 2. We send 2, wait for 1 to finish, and then send again.
it 'attempts to load a new batch without pending downloads' do it 'attempts to load a new batch without pending downloads' do
stub_const('Geo::BaseSchedulerWorker::DB_RETRIEVE_BATCH_SIZE', 5) stub_const('Geo::BaseSchedulerWorker::DB_RETRIEVE_BATCH_SIZE', 5)
stub_const('Geo::BaseSchedulerWorker::MAX_CAPACITY', 2) secondary.update!(files_max_capacity: 2)
allow_any_instance_of(::Gitlab::Geo::Transfer).to receive(:download_from_primary).and_return(100) allow_any_instance_of(::Gitlab::Geo::Transfer).to receive(:download_from_primary).and_return(100)
avatar = fixture_file_upload(Rails.root.join('spec/fixtures/dk.png')) avatar = fixture_file_upload(Rails.root.join('spec/fixtures/dk.png'))
......
...@@ -102,7 +102,7 @@ describe Geo::RepositorySyncWorker, :postgresql do ...@@ -102,7 +102,7 @@ describe Geo::RepositorySyncWorker, :postgresql do
before do before do
allow_any_instance_of(described_class).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave allow_any_instance_of(described_class).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
allow_any_instance_of(described_class).to receive(:max_capacity).and_return(3) # Must be more than db_retrieve_batch_size secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
allow_any_instance_of(Project).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo')) allow_any_instance_of(Project).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo'))
allow_any_instance_of(Geo::ProjectSyncWorker).to receive(:sync_wiki?).and_return(false) allow_any_instance_of(Geo::ProjectSyncWorker).to receive(:sync_wiki?).and_return(false)
allow_any_instance_of(Geo::RepositorySyncService).to receive(:expire_repository_caches) allow_any_instance_of(Geo::RepositorySyncService).to receive(:expire_repository_caches)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment