Commit 1eb6f691 authored by Nick Thomas's avatar Nick Thomas

Make the maximum capacity of Geo backfill operations configurable

parent 58397d10
......@@ -29,3 +29,17 @@
= hidden_field_tag "#{form.object_name}[namespace_ids]", geo_node.namespace_ids.join(","), class: 'js-geo-node-namespaces', data: { selected: node_namespaces_options(geo_node.namespaces).to_json }
.help-block
#{ s_("Choose which groups you wish to replicate to this secondary node. Leave blank to replicate all.") }
.form-group.js-hide-if-geo-primary{ class: ('hidden' unless geo_node.secondary?) }
= form.label :repos_max_capacity, s_('Geo|Repository sync capacity'), class: 'control-label'
.col-sm-10
= form.number_field :repos_max_capacity, class: 'form-control', min: 0
.help-block
#{ s_('Control the maximum concurrency of repository backfill for this secondary node') }
.form-group.js-hide-if-geo-primary{ class: ('hidden' unless geo_node.secondary?) }
= form.label :files_max_capacity, s_('Geo|File sync capacity'), class: 'control-label'
.col-sm-10
= form.number_field :files_max_capacity, class: 'form-control', min: 0
.help-block
#{ s_('Control the maximum concurrency of LFS/attachment backfill for this secondary node') }
......@@ -5,7 +5,6 @@ module Geo
DB_RETRIEVE_BATCH_SIZE = 1000
LEASE_TIMEOUT = 60.minutes
MAX_CAPACITY = 10
RUN_TIME = 60.minutes.to_i
attr_reader :pending_resources, :scheduled_jobs, :start_time, :loops
......@@ -18,7 +17,7 @@ module Geo
# The scheduling works as the following:
#
# 1. Load a batch of IDs that we need to schedule (DB_RETRIEVE_BATCH_SIZE) into a pending list.
# 2. Schedule them so that at most MAX_CAPACITY are running at once.
# 2. Schedule them so that at most `max_capacity` are running at once.
# 3. When a slot frees, schedule another job.
# 4. When we have drained the pending list, load another batch into memory, and schedule the
# remaining jobs, excluding ones in progress.
......@@ -82,7 +81,7 @@ module Geo
end
def max_capacity
MAX_CAPACITY
raise NotImplementedError
end
def run_time
......
......@@ -2,6 +2,10 @@ module Geo
class FileDownloadDispatchWorker < Geo::BaseSchedulerWorker
private
def max_capacity
current_node.files_max_capacity
end
def schedule_job(object_db_id, object_type)
job_id = GeoFileDownloadWorker.perform_async(object_type, object_db_id)
......
module Geo
class RepositorySyncWorker < Geo::BaseSchedulerWorker
MAX_CAPACITY = 25
private
def max_capacity
MAX_CAPACITY
current_node.repos_max_capacity
end
def schedule_job(project_id)
......
---
title: Make the maximum capacity of Geo backfill operations configurable
merge_request: 3107
author:
type: added
class GeoConfigurableMaxCapacities < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_column_with_default :geo_nodes, :files_max_capacity, :integer, allow_null: false, default: 10
add_column_with_default :geo_nodes, :repos_max_capacity, :integer, allow_null: false, default: 25
end
def down
remove_column :geo_nodes, :files_max_capacity, :integer
remove_column :geo_nodes, :repos_max_capacity, :integer
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20171006091000) do
ActiveRecord::Schema.define(version: 20171010140746) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -758,6 +758,8 @@ ActiveRecord::Schema.define(version: 20171006091000) do
t.string "encrypted_secret_access_key"
t.string "encrypted_secret_access_key_iv"
t.string "clone_url_prefix"
t.integer "files_max_capacity", default: 10, null: false
t.integer "repos_max_capacity", default: 25, null: false
end
add_index "geo_nodes", ["access_key"], name: "index_geo_nodes_on_access_key", using: :btree
......
......@@ -5,13 +5,44 @@ For more information about setting up GitLab Geo, read the
When you're done, you can navigate to **Admin area ➔ Geo nodes** (`/admin/geo_nodes`).
In the following table you can see what all these settings mean:
## Common settings
All Geo nodes have the following settings:
| Setting | Description |
| --------- | ----------- |
| --------| ----------- |
| Primary | This marks a Geo Node as primary. There can be only one primary, make sure that you first add the primary node and then all the others. |
| URL | Your instance's full URL, in the same way it is configured in `/etc/gitlab/gitlab.rb` (Omnibus GitLab installations) or `gitlab.yml` (source based installations). |
| URL | The instance's full URL, in the same way it is configured in `/etc/gitlab/gitlab.rb` (Omnibus GitLab installations) or `gitlab.yml` (source based installations). |
The node you're reading from is indicated with a green `Current node` label, and
the primary is given a blue `Primary` label. Remember that you can only make
changes on the primary!
## Secondary node settings
Secondaries have a number of additional settings available:
| Setting | Description|
|--------------------------|------------|
| Public Key | The SSH public key of the user that your GitLab instance runs on (unless changed, should be the user `git`). |
| Groups to replicate | Enable Geo selective sync for this secondary - only the selected groups will be synchronized. |
| Repository sync capacity | Number of concurrent requests this secondary will make to the primary when backfilling repositories. |
| File sync capacity | Number of concurrent requests this secondary will make to the primary when backfilling files. |
## Geo backfill
Secondaries are notified of changes to repositories and files by the primary,
and will always attempt to synchronize those changes as quickly as possible.
Backfill is the act of populating the secondary with repositories and files that
existed *before* the secondary was added to the database. Since there may be
extremely large numbers of repositories and files, it's infeasible to attempt to
download them all at once, so GitLab places an upper limit on the concurrency of
these operations.
A primary node will have a star right next to it to distinguish from the
secondaries.
How long the backfill takes is a function of the maximum concurrency, but higher
values place more strain on the primary node. From [GitLab 10.2](https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/3107),
the limits are configurable - if your primary node has lots of surplus capacity,
you can increase the values to complete backfill in a shorter time. If it's
under heavy load and backfill is reducing its availability for normal requests,
you can decrease them.
require 'spec_helper'
describe GeoNode, type: :model do
using RSpec::Parameterized::TableSyntax
include ::EE::GeoHelpers
let(:new_node) { create(:geo_node, schema: 'https', host: 'localhost', port: 3000, relative_url_root: 'gitlab') }
......@@ -29,28 +30,22 @@ describe GeoNode, type: :model do
context 'default values' do
let(:gitlab_host) { 'gitlabhost' }
before do
allow(Gitlab.config.gitlab).to receive(:host) { gitlab_host }
end
it 'defines a default schema' do
expect(empty_node.schema).to eq('http')
end
it 'defines a default host' do
expect(empty_node.host).to eq(gitlab_host)
end
it 'defines a default port' do
expect(empty_node.port).to eq(80)
where(:attribute, :value) do
:schema | 'http'
:host | 'gitlabhost'
:port | 80
:relative_url_root | ''
:primary | false
:repos_max_capacity | 25
:files_max_capacity | 10
end
it 'defines a default relative_url_root' do
expect(empty_node.relative_url_root).to eq('')
with_them do
before do
allow(Gitlab.config.gitlab).to receive(:host) { gitlab_host }
end
it 'defines a default primary flag' do
expect(empty_node.primary).to eq(false)
it { expect(empty_node[attribute]).to eq(value) }
end
end
......
......@@ -61,7 +61,7 @@ describe Geo::FileDownloadDispatchWorker, :postgresql do
# 2. We send 2, wait for 1 to finish, and then send again.
it 'attempts to load a new batch without pending downloads' do
stub_const('Geo::BaseSchedulerWorker::DB_RETRIEVE_BATCH_SIZE', 5)
stub_const('Geo::BaseSchedulerWorker::MAX_CAPACITY', 2)
secondary.update!(files_max_capacity: 2)
allow_any_instance_of(::Gitlab::Geo::Transfer).to receive(:download_from_primary).and_return(100)
avatar = fixture_file_upload(Rails.root.join('spec/fixtures/dk.png'))
......
......@@ -102,7 +102,7 @@ describe Geo::RepositorySyncWorker, :postgresql do
before do
allow_any_instance_of(described_class).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
allow_any_instance_of(described_class).to receive(:max_capacity).and_return(3) # Must be more than db_retrieve_batch_size
secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
allow_any_instance_of(Project).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo'))
allow_any_instance_of(Geo::ProjectSyncWorker).to receive(:sync_wiki?).and_return(false)
allow_any_instance_of(Geo::RepositorySyncService).to receive(:expire_repository_caches)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment