Commit f1c14522 authored by Robert Speicher's avatar Robert Speicher

Merge branch '2224-selective-replication' into 'master'

Geo - Selectively choose which projects to replicate in DR

Closes #2224

See merge request !2533
parents 8e9676b1 266a8421
......@@ -57,6 +57,7 @@ import UserCallout from './user_callout';
import ShortcutsWiki from './shortcuts_wiki';
import Pipelines from './pipelines';
import BlobViewer from './blob/viewer/index';
import GeoNodeForm from './geo/geo_node_form';
import GeoNodes from './geo_nodes';
import AutoWidthDropdownSelect from './issuable/auto_width_dropdown_select';
import UsersSelect from './users_select';
......@@ -611,6 +612,7 @@ import initGroupAnalytics from './init_group_analytics';
break;
case 'geo_nodes':
new GeoNodes($('.geo-nodes'));
new GeoNodeForm($('.js-geo-node-form'));
break;
}
break;
......
export default class GeoNodeForm {
constructor(container) {
this.$container = container;
this.$namespaces = this.$container.find('.js-namespaces');
this.$namespacesSelect = this.$namespaces.find('.select2');
this.$primaryCheckbox = this.$container.find("input[type='checkbox']");
this.$primaryCheckbox.on('change', () => this.onPrimaryCheckboxChange());
}
onPrimaryCheckboxChange() {
this.$namespacesSelect.select2('data', null);
this.$namespaces.toggleClass('hidden', this.$primaryCheckbox.is(':checked'));
}
}
......@@ -13,5 +13,20 @@ module Geo
belongs_to :repository_renamed_event,
class_name: 'Geo::RepositoryRenamedEvent',
foreign_key: :repository_renamed_event_id
belongs_to :repositories_changed_event,
class_name: 'Geo::RepositoriesChangedEvent',
foreign_key: :repositories_changed_event_id
def event
repository_updated_event ||
repository_deleted_event ||
repository_renamed_event ||
repositories_changed_event
end
def project_id
event.try(:project_id)
end
end
end
module Geo
class RepositoriesChangedEvent < ActiveRecord::Base
include Geo::Model
belongs_to :geo_node
validates :geo_node, presence: true
end
end
......@@ -5,6 +5,9 @@ class GeoNode < ActiveRecord::Base
belongs_to :oauth_application, class_name: 'Doorkeeper::Application', dependent: :destroy # rubocop: disable Cop/ActiveRecordDependent
belongs_to :system_hook, dependent: :destroy # rubocop: disable Cop/ActiveRecordDependent
has_many :geo_node_namespace_links
has_many :namespaces, through: :geo_node_namespace_links
default_values schema: lambda { Gitlab.config.gitlab.protocol },
host: lambda { Gitlab.config.gitlab.host },
port: lambda { Gitlab.config.gitlab.port },
......@@ -105,6 +108,53 @@ class GeoNode < ActiveRecord::Base
end
end
def restricted_project_ids
return unless namespaces.presence
relations = namespaces.map { |namespace| namespace.all_projects.select(:id) }
Project.unscoped
.from("(#{Gitlab::SQL::Union.new(relations).to_sql}) #{Project.table_name}")
.pluck(:id)
end
def lfs_objects
if restricted_project_ids
LfsObject.joins(:projects).where(projects: { id: restricted_project_ids })
else
LfsObject.all
end
end
def projects
if restricted_project_ids
Project.where(id: restricted_project_ids)
else
Project.all
end
end
def project_registries
if restricted_project_ids
Geo::ProjectRegistry.where(project_id: restricted_project_ids)
else
Geo::ProjectRegistry.all
end
end
def uploads
if restricted_project_ids
uploads_table = Upload.arel_table
group_uploads = uploads_table[:model_type].eq('Namespace').and(uploads_table[:model_id].in(Gitlab::Geo.current_node.namespace_ids))
project_uploads = uploads_table[:model_type].eq('Project').and(uploads_table[:model_id].in(restricted_project_ids))
other_uploads = uploads_table[:model_type].not_in(%w[Namespace Project])
Upload.where(group_uploads.or(project_uploads).or(other_uploads))
else
Upload.all
end
end
private
def geo_api_url(suffix)
......
class GeoNodeNamespaceLink < ActiveRecord::Base
belongs_to :geo_node, inverse_of: :namespaces
belongs_to :namespace
validates :namespace_id, presence: true, uniqueness: { scope: [:geo_node_id] }
end
......@@ -15,7 +15,7 @@ class GeoNodeStatus
end
def repositories_count
@repositories_count ||= Project.count
@repositories_count ||= repositories.count
end
def repositories_count=(value)
......@@ -23,7 +23,7 @@ class GeoNodeStatus
end
def repositories_synced_count
@repositories_synced_count ||= Geo::ProjectRegistry.synced.count
@repositories_synced_count ||= project_registries.synced.count
end
def repositories_synced_count=(value)
......@@ -35,7 +35,7 @@ class GeoNodeStatus
end
def repositories_failed_count
@repositories_failed_count ||= Geo::ProjectRegistry.failed.count
@repositories_failed_count ||= project_registries.failed.count
end
def repositories_failed_count=(value)
......@@ -43,7 +43,7 @@ class GeoNodeStatus
end
def lfs_objects_count
@lfs_objects_count ||= LfsObject.count
@lfs_objects_count ||= lfs_objects.count
end
def lfs_objects_count=(value)
......@@ -51,7 +51,15 @@ class GeoNodeStatus
end
def lfs_objects_synced_count
@lfs_objects_synced_count ||= Geo::FileRegistry.where(file_type: :lfs).count
@lfs_objects_synced_count ||= begin
relation = Geo::FileRegistry.where(file_type: :lfs)
if Gitlab::Geo.current_node.restricted_project_ids
relation = relation.where(file_id: lfs_objects.pluck(:id))
end
relation.count
end
end
def lfs_objects_synced_count=(value)
......@@ -63,7 +71,7 @@ class GeoNodeStatus
end
def attachments_count
@attachments_count ||= Upload.count
@attachments_count ||= attachments.count
end
def attachments_count=(value)
......@@ -72,7 +80,7 @@ class GeoNodeStatus
def attachments_synced_count
@attachments_synced_count ||= begin
upload_ids = Upload.pluck(:id)
upload_ids = attachments.pluck(:id)
synced_ids = Geo::FileRegistry.where(file_type: [:attachment, :avatar, :file]).pluck(:file_id)
(synced_ids & upload_ids).length
......@@ -94,4 +102,20 @@ class GeoNodeStatus
(synced.to_f / total.to_f) * 100.0
end
def attachments
@attachments ||= Gitlab::Geo.current_node.uploads
end
def lfs_objects
@lfs_objects ||= Gitlab::Geo.current_node.lfs_objects
end
def project_registries
@project_registries ||= Gitlab::Geo.current_node.project_registries
end
def repositories
@repositories ||= Gitlab::Geo.current_node.projects
end
end
......@@ -44,12 +44,18 @@ module Geo
end
def log_error(message, error)
Gitlab::Geo::Logger.error(
Gitlab::Geo::Logger.error({
class: self.class.name,
message: message,
error: error,
error: error
}.merge(log_params))
end
def log_params
{
project_id: project.id,
project_path: project.full_path)
project_path: project.full_path
}
end
end
end
module Geo
class NodeUpdateService
attr_reader :geo_node, :old_namespace_ids, :params
def initialize(geo_node, params)
@geo_node = geo_node
@old_namespace_ids = geo_node.namespace_ids
@params = params.slice(:url, :primary, :namespace_ids)
end
def execute
return false unless geo_node.update(params)
if geo_node.secondary? && namespaces_changed?(geo_node)
Geo::RepositoriesChangedEventStore.new(geo_node).create
end
geo_node
end
private
def namespaces_changed?(geo_node)
geo_node.namespace_ids.any? && geo_node.namespace_ids != old_namespace_ids
end
end
end
module Geo
class RepositoriesChangedEventStore < EventStore
self.event_type = :repositories_changed_event
attr_reader :geo_node
def initialize(geo_node)
@geo_node = geo_node
end
private
def build_event
Geo::RepositoriesChangedEvent.new(geo_node: geo_node)
end
def log_params
{
geo_node_id: geo_node.id,
geo_node_url: geo_node.url
}
end
end
end
......@@ -20,4 +20,11 @@
- unless disable_key_edit
%p.help-block
Paste a machine public key here for the GitLab user this node runs on. Read more about how to generate it
= link_to "here", help_page_path("ssh/README")
= link_to 'here', help_page_path('ssh/README')
.form-group.js-namespaces{ class: ('hidden' unless geo_node.new_record? || geo_node.secondary?) }
= form.label :namespace_ids, 'Namespaces to replicate', class: 'control-label'
.col-sm-10
= form.select :namespace_ids, namespaces_options(geo_node.namespace_ids), { include_hidden: true }, multiple: true, class: 'select2 select-wide', data: { field: 'namespace_ids' }
.help-block
Choose which namespaces you wish to replicate to this secondary node. Leave blank to replicate all.
......@@ -2,7 +2,7 @@
%h3.page-title
Edit Geo Node
= form_for [:admin, @node], html: { class: 'form-horizontal' } do |f|
= form_for [:admin, @node], html: { class: 'form-horizontal js-geo-node-form' } do |f|
= render partial: 'form', locals: { form: f, geo_node: @node, disable_key_edit: true }
.form-actions
= f.submit 'Save changes', class: 'btn btn-create'
......
......@@ -9,7 +9,7 @@
%hr
- if Gitlab::Geo.license_allows?
= form_for [:admin, @node], as: :geo_node, url: admin_geo_nodes_path, html: { class: 'form-horizontal' } do |f|
= form_for [:admin, @node], as: :geo_node, url: admin_geo_nodes_path, html: { class: 'form-horizontal js-geo-node-form' } do |f|
= render partial: 'form', locals: { form: f, geo_node: @node }
.form-actions
= f.submit 'Add Node', class: 'btn btn-create'
......@@ -33,6 +33,12 @@
%span.help-block Primary node
- else
= status_loading_icon
- if node.restricted_project_ids
%p
%span.help-block
Namespaces to replicate:
%strong.node-info
= node_selected_namespaces_to_replicate(node)
.js-geo-node-status{ style: 'display: none' }
- if node.enabled?
%p
......
......@@ -151,6 +151,10 @@ module Geo
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
def current_node
Gitlab::Geo.current_node
end
def node_enabled?
# Only check every minute to avoid polling the DB excessively
unless @last_enabled_check.present? && @last_enabled_check > 1.minute.ago
......
......@@ -9,30 +9,32 @@ module Geo
end
def load_pending_resources
lfs_object_ids = find_lfs_object_ids(db_retrieve_batch_size)
objects_ids = find_object_ids(db_retrieve_batch_size)
lfs_object_ids = find_lfs_object_ids
objects_ids = find_object_ids
interleave(lfs_object_ids, objects_ids)
end
def find_object_ids(limit)
def find_object_ids
downloaded_ids = find_downloaded_ids([:attachment, :avatar, :file])
Upload.where.not(id: downloaded_ids)
.order(created_at: :desc)
.limit(limit)
.pluck(:id, :uploader)
.map { |id, uploader| [id, uploader.sub(/Uploader\z/, '').downcase] }
current_node.uploads
.where.not(id: downloaded_ids)
.order(created_at: :desc)
.limit(db_retrieve_batch_size)
.pluck(:id, :uploader)
.map { |id, uploader| [id, uploader.sub(/Uploader\z/, '').downcase] }
end
def find_lfs_object_ids(limit)
def find_lfs_object_ids
downloaded_ids = find_downloaded_ids([:lfs])
LfsObject.where.not(id: downloaded_ids)
.order(created_at: :desc)
.limit(limit)
.pluck(:id)
.map { |id| [id, :lfs] }
current_node.lfs_objects
.where.not(id: downloaded_ids)
.order(created_at: :desc)
.limit(db_retrieve_batch_size)
.pluck(:id)
.map { |id| [id, :lfs] }
end
def find_downloaded_ids(file_types)
......
module Geo
class RepositoriesCleanUpWorker
include Sidekiq::Worker
include GeoQueue
BATCH_SIZE = 250
LEASE_TIMEOUT = 60.minutes
def perform(geo_node_id)
# Prevent multiple Sidekiq workers from performing repositories clean up
try_obtain_lease do
geo_node = GeoNode.find(geo_node_id)
restricted_project_ids = geo_node.restricted_project_ids
return unless restricted_project_ids
Project.where.not(id: restricted_project_ids).find_in_batches(batch_size: BATCH_SIZE) do |batch|
batch.each do |project|
clean_up_repositories(project)
end
end
end
rescue ActiveRecord::RecordNotFound => e
log_error('Could not find Geo node, skipping repositories clean up', geo_node_id: geo_node_id, error: e)
end
private
def clean_up_repositories(project)
job_id = ::GeoRepositoryDestroyWorker.perform_async(project.id, project.name, project.full_path)
if job_id
log_info('Repository cleaned up', project_id: project.id, full_path: project.full_path, job_id: job_id)
else
log_error('Could not clean up repository', project_id: project.id, full_path: project.full_path)
end
end
def try_obtain_lease
lease = exclusive_lease.try_obtain
unless lease
log_error('Cannot obtain an exclusive lease. There must be another worker already in execution.')
return
end
begin
yield lease
ensure
release_lease(lease)
end
end
def lease_key
@lease_key ||= self.class.name.underscore
end
def exclusive_lease
@lease ||= Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
end
def release_lease(uuid)
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
def log_info(message, params = {})
Gitlab::Geo::Logger.info({ class: self.class.name, message: message }.merge(params))
end
def log_error(message, params = {})
Gitlab::Geo::Logger.error({ class: self.class.name, message: message }.merge(params))
end
end
end
......@@ -23,17 +23,19 @@ module Geo
end
def find_project_ids_not_synced
Project.where.not(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.order(last_repository_updated_at: :desc)
.limit(db_retrieve_batch_size)
.pluck(:id)
current_node.projects
.where.not(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.order(last_repository_updated_at: :desc)
.limit(db_retrieve_batch_size)
.pluck(:id)
end
def find_project_ids_updated_recently
Geo::ProjectRegistry.dirty
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at, :desc))
.limit(db_retrieve_batch_size)
.pluck(:project_id)
current_node.project_registries
.dirty
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at, :desc))
.limit(db_retrieve_batch_size)
.pluck(:project_id)
end
end
end
---
title: Geo - Selectively choose which namespaces to replicate in DR
merge_request: 2533
author:
class CreateGeoNodeNamespaceLinks < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def change
create_table :geo_node_namespace_links do |t|
t.references :geo_node, index: true, foreign_key: { on_delete: :cascade }, null: false
t.references :namespace, foreign_key: { on_delete: :cascade }, null: false
t.index [:geo_node_id, :namespace_id], unique: true
end
add_timestamps_with_timezone :geo_node_namespace_links
end
end
class GeoRepositoriesChangedEvents < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
create_table :geo_repositories_changed_events, id: :bigserial do |t|
t.references :geo_node, index: true, foreign_key: { on_delete: :cascade }, null: false
end
add_column :geo_event_log, :repositories_changed_event_id, :integer, limit: 8
end
end
class AddGeoRepositoriesChangedEventsForeignKey < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_foreign_key :geo_event_log, :geo_repositories_changed_events,
column: :repositories_changed_event_id, on_delete: :cascade
end
def down
remove_foreign_key :geo_event_log, column: :repositories_changed_event_id
end
end
......@@ -629,10 +629,21 @@ ActiveRecord::Schema.define(version: 20170803130232) do
t.integer "repository_updated_event_id", limit: 8
t.integer "repository_deleted_event_id", limit: 8
t.integer "repository_renamed_event_id", limit: 8
t.integer "repositories_changed_event_id", limit: 8
end
add_index "geo_event_log", ["repository_updated_event_id"], name: "index_geo_event_log_on_repository_updated_event_id", using: :btree
create_table "geo_node_namespace_links", force: :cascade do |t|
t.integer "geo_node_id", null: false
t.integer "namespace_id", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
add_index "geo_node_namespace_links", ["geo_node_id", "namespace_id"], name: "index_geo_node_namespace_links_on_geo_node_id_and_namespace_id", unique: true, using: :btree
add_index "geo_node_namespace_links", ["geo_node_id"], name: "index_geo_node_namespace_links_on_geo_node_id", using: :btree
create_table "geo_nodes", force: :cascade do |t|
t.string "schema"
t.string "host"
......@@ -653,6 +664,12 @@ ActiveRecord::Schema.define(version: 20170803130232) do
add_index "geo_nodes", ["host"], name: "index_geo_nodes_on_host", using: :btree
add_index "geo_nodes", ["primary"], name: "index_geo_nodes_on_primary", using: :btree
create_table "geo_repositories_changed_events", id: :bigserial, force: :cascade do |t|
t.integer "geo_node_id", null: false
end
add_index "geo_repositories_changed_events", ["geo_node_id"], name: "index_geo_repositories_changed_events_on_geo_node_id", using: :btree
create_table "geo_repository_deleted_events", id: :bigserial, force: :cascade do |t|
t.integer "project_id", null: false
t.text "repository_storage_name", null: false
......@@ -1971,9 +1988,13 @@ ActiveRecord::Schema.define(version: 20170803130232) do
add_foreign_key "environments", "projects", name: "fk_d1c8c1da6a", on_delete: :cascade
add_foreign_key "events", "projects", name: "fk_0434b48643", on_delete: :cascade
add_foreign_key "forked_project_links", "projects", column: "forked_to_project_id", name: "fk_434510edb0", on_delete: :cascade
add_foreign_key "geo_event_log", "geo_repositories_changed_events", column: "repositories_changed_event_id", name: "fk_4a99ebfd60", on_delete: :cascade
add_foreign_key "geo_event_log", "geo_repository_deleted_events", column: "repository_deleted_event_id", name: "fk_c4b1c1f66e", on_delete: :cascade
add_foreign_key "geo_event_log", "geo_repository_renamed_events", column: "repository_renamed_event_id", name: "fk_86c84214ec", on_delete: :cascade
add_foreign_key "geo_event_log", "geo_repository_updated_events", column: "repository_updated_event_id", on_delete: :cascade
add_foreign_key "geo_node_namespace_links", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_node_namespace_links", "namespaces", on_delete: :cascade
add_foreign_key "geo_repositories_changed_events", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_repository_renamed_events", "projects", on_delete: :cascade
add_foreign_key "geo_repository_updated_events", "projects", on_delete: :cascade
add_foreign_key "index_statuses", "projects", name: "fk_74b2492545", on_delete: :cascade
......
......@@ -25,7 +25,7 @@ class Admin::GeoNodesController < Admin::ApplicationController
end
def update
if @node.update_attributes(geo_node_params.except(:geo_node_key_attributes))
if Geo::NodeUpdateService.new(@node, geo_node_params).execute
redirect_to admin_geo_nodes_path, notice: 'Geo Node was successfully updated.'
else
render 'edit'
......@@ -79,7 +79,7 @@ class Admin::GeoNodesController < Admin::ApplicationController
private
def geo_node_params
params.require(:geo_node).permit(:url, :primary, geo_node_key_attributes: [:key])
params.require(:geo_node).permit(:url, :primary, namespace_ids: [], geo_node_key_attributes: [:key])
end
def check_license
......
module EE
module GeoHelper
def node_selected_namespaces_to_replicate(node)
node.namespaces.map(&:human_name).sort.join(', ')
end
def node_status_icon(node)
unless node.primary?
status = node.enabled? ? 'unknown' : 'disabled'
......
......@@ -38,14 +38,15 @@ module Gitlab
# Projects without corresponding ProjectRegistry in the DR database
# See: https://robots.thoughtbot.com/postgres-foreign-data-wrapper (requires PG 9.6)
$stdout.print 'Searching for non replicated projects...'
Project.select(:id).find_in_batches(batch_size: BATCH_SIZE) do |batch|
Gitlab::Geo.current_node.projects.select(:id).find_in_batches(batch_size: BATCH_SIZE) do |batch|
$stdout.print '.'
project_ids = batch.map(&:id)
existing = ::Geo::ProjectRegistry.where(project_id: project_ids).pluck(:project_id)
missing_projects = project_ids - existing
Gitlab::Geo::Logger.debug(
Gitlab::Geo::Logger.info(
class: self.class.name,
message: "Missing projects",
projects: missing_projects,
......@@ -60,12 +61,15 @@ module Gitlab
end
def handle_events(batch)
batch.each do |event|
# Update repository
if event.repository_updated_event
handle_repository_update(event.repository_updated_event)
elsif event.repository_deleted_event
handle_repository_delete(event.repository_deleted_event)
batch.each do |event_log|
next unless can_replay?(event_log)
if event_log.repository_updated_event
handle_repository_update(event_log.repository_updated_event)
elsif event_log.repository_deleted_event
handle_repository_delete(event_log.repository_deleted_event)
elsif event_log.repositories_changed_event
handle_repositories_changed(event_log.repositories_changed_event)
end
end
end
......@@ -88,6 +92,17 @@ module Gitlab
@exit = true
end
def exit?
@exit
end
def can_replay?(event_log)
return true if event_log.project_id.nil?
return true if Gitlab::Geo.current_node.restricted_project_ids.nil?
Gitlab::Geo.current_node.restricted_project_ids.include?(event_log.project_id)
end
def handle_repository_update(updated_event)
registry = ::Geo::ProjectRegistry.find_or_initialize_by(project_id: updated_event.project_id)
......@@ -129,8 +144,24 @@ module Gitlab
::Geo::ProjectRegistry.where(project_id: deleted_event.project_id).delete_all
end
def exit?
@exit
def handle_repositories_changed(changed_event)
return unless Gitlab::Geo.current_node.id == changed_event.geo_node_id
job_id = ::Geo::RepositoriesCleanUpWorker.perform_in(1.hour, changed_event.geo_node_id)
if job_id
log_info('Scheduled repositories clean up for Geo node', geo_node_id: changed_event.geo_node_id, job_id: job_id)
else
log_error('Could not schedule repositories clean up for Geo node', geo_node_id: changed_event.geo_node_id)
end
end
def log_info(message, params = {})
Gitlab::Geo::Logger.info({ class: self.class.name, message: message }.merge(params))
end
def log_error(message, params = {})
Gitlab::Geo::Logger.error({ class: self.class.name, message: message }.merge(params))
end
end
end
......
......@@ -132,15 +132,21 @@ describe Admin::GeoNodesController, :postgresql do
context 'with add-on license' do
before do
allow(Gitlab::Geo).to receive(:license_allows?).and_return(true)
go
end
it 'updates the node without changing the key' do
geo_node.reload
go
geo_node.reload
expect(geo_node.url.chomp('/')).to eq(geo_node_attributes[:url])
expect(geo_node.geo_node_key.fingerprint).to eq(original_fingerprint)
end
it 'delegates the update of the Geo node to Geo::NodeUpdateService' do
expect_any_instance_of(Geo::NodeUpdateService).to receive(:execute).once
go
end
end
end
......@@ -260,7 +266,8 @@ describe Admin::GeoNodesController, :postgresql do
lfs_objects_count: 256,
lfs_objects_synced_count: 123,
repositories_count: 10,
repositories_synced_count: 5
repositories_synced_count: 5,
repositories_failed_count: 0
)
end
......
FactoryGirl.define do
factory :geo_event_log, class: Geo::EventLog do
trait :updated_event do
repository_updated_event factory: :geo_repository_update_event
repository_updated_event factory: :geo_repository_updated_event
end
trait :deleted_event do
repository_deleted_event factory: :geo_repository_delete_event
repository_deleted_event factory: :geo_repository_deleted_event
end
end
factory :geo_repository_update_event, class: Geo::RepositoryUpdatedEvent do
factory :geo_repository_updated_event, class: Geo::RepositoryUpdatedEvent do
source 0
branches_affected 0
tags_affected 0
project
end
factory :geo_repository_delete_event, class: Geo::RepositoryDeletedEvent do
factory :geo_repository_deleted_event, class: Geo::RepositoryDeletedEvent do
project
repository_storage_name { project.repository_storage }
......@@ -24,4 +24,21 @@ FactoryGirl.define do
deleted_path { project.path_with_namespace }
deleted_project_name { project.name }
end
factory :geo_repository_renamed_event, class: Geo::RepositoryRenamedEvent do
project
repository_storage_name { project.repository_storage }
repository_storage_path { project.repository_storage_path }
old_path_with_namespace { project.full_path }
new_path_with_namespace { project.full_path }
old_wiki_path_with_namespace { project.wiki.path_with_namespace }
new_wiki_path_with_namespace { project.wiki.path_with_namespace }
old_path { project.path }
new_path { project.path }
end
factory :geo_repositories_changed_event, class: Geo::RepositoriesChangedEvent do
geo_node
end
end
FactoryGirl.define do
factory :geo_file_registry, class: Geo::FileRegistry do
sequence(:file_id)
file_type :file
trait :avatar do
file_type :avatar
end
trait :lfs do
file_type :lfs
end
end
end
FactoryGirl.define do
factory :geo_node_namespace_link do
geo_node
namespace
end
end
......@@ -2,6 +2,8 @@ require 'spec_helper'
describe Gitlab::Geo::LogCursor::Daemon do
describe '#run!' do
let!(:geo_node) { create(:geo_node, :current) }
it 'traps signals' do
allow(subject).to receive(:exit?) { true }
expect(subject).to receive(:trap_signals)
......@@ -21,7 +23,7 @@ describe Gitlab::Geo::LogCursor::Daemon do
end
end
context 'when processing a repository updated event' do
context 'when replaying a repository updated event' do
let(:event_log) { create(:geo_event_log, :updated_event) }
let!(:event_log_state) { create(:geo_event_log_state, event_id: event_log.id - 1) }
let(:repository_updated_event) { event_log.repository_updated_event }
......@@ -53,7 +55,7 @@ describe Gitlab::Geo::LogCursor::Daemon do
end
end
context 'when processing a repository deleted event' do
context 'when replaying a repository deleted event' do
let(:event_log) { create(:geo_event_log, :deleted_event) }
let(:project) { event_log.repository_deleted_event.project }
let!(:event_log_state) { create(:geo_event_log_state, event_id: event_log.id - 1) }
......@@ -79,5 +81,74 @@ describe Gitlab::Geo::LogCursor::Daemon do
subject.run!
end
end
context 'when replaying a repositories changed event' do
let(:geo_node) { create(:geo_node) }
let(:repositories_changed_event) { create(:geo_repositories_changed_event, geo_node: geo_node) }
let(:event_log) { create(:geo_event_log, repositories_changed_event: repositories_changed_event) }
let!(:event_log_state) { create(:geo_event_log_state, event_id: event_log.id - 1) }
before do
allow(subject).to receive(:exit?).and_return(false, true)
end
it 'schedules a GeoRepositoryDestroyWorker when event node is the current node' do
allow(Gitlab::Geo).to receive(:current_node).and_return(geo_node)
expect(Geo::RepositoriesCleanUpWorker).to receive(:perform_in).with(within(5.minutes).of(1.hour), geo_node.id)
subject.run!
end
it 'does not schedule a GeoRepositoryDestroyWorker when event node is not the current node' do
allow(Gitlab::Geo).to receive(:current_node).and_return(build(:geo_node))
expect(Geo::RepositoriesCleanUpWorker).not_to receive(:perform_in)
subject.run!
end
end
context 'when node has namespace restrictions' do
let(:geo_node) { create(:geo_node, :current) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:project) { create(:project, group: group_1) }
let(:repository_updated_event) { create(:geo_repository_updated_event, project: project) }
let(:event_log) { create(:geo_event_log, repository_updated_event: repository_updated_event) }
let!(:event_log_state) { create(:geo_event_log_state, event_id: event_log.id - 1) }
before do
allow(subject).to receive(:exit?).and_return(false, true)
end
it 'replays events for projects that belong to selected namespaces to replicate' do
geo_node.update_attribute(:namespaces, [group_1])
expect { subject.run! }.to change(Geo::ProjectRegistry, :count).by(1)
end
it 'does not replay events for projects that do not belong to selected namespaces to replicate' do
geo_node.update_attribute(:namespaces, [group_2])
expect { subject.run! }.not_to change(Geo::ProjectRegistry, :count)
end
context 'when performing a full scan' do
subject { described_class.new(full_scan: true) }
it 'creates registries for missing projects that belong to selected namespaces' do
geo_node.update_attribute(:namespaces, [group_1])
expect { subject.run! }.to change(Geo::ProjectRegistry, :count).by(1)
end
it 'does not create registries for missing projects that do not belong to selected namespaces' do
geo_node.update_attribute(:namespaces, [group_2])
expect { subject.run! }.not_to change(Geo::ProjectRegistry, :count)
end
end
end
end
end
......@@ -3,6 +3,62 @@ require 'spec_helper'
RSpec.describe Geo::EventLog, type: :model do
describe 'relationships' do
it { is_expected.to belong_to(:repository_updated_event).class_name('Geo::RepositoryUpdatedEvent').with_foreign_key('repository_updated_event_id') }
it { is_expected.to belong_to(:repository_deleted_event).class_name('Geo::RepositoryDeletedEvent').with_foreign_key('repository_deleted_event_id') }
it { is_expected.to belong_to(:repository_renamed_event).class_name('Geo::RepositoryRenamedEvent').with_foreign_key('repository_renamed_event_id') }
it { is_expected.to belong_to(:repositories_changed_event).class_name('Geo::RepositoriesChangedEvent').with_foreign_key('repositories_changed_event_id') }
end
describe '#event' do
it 'returns nil when having no event associated' do
expect(subject.event).to be_nil
end
it 'returns repository_updated_event when set' do
repository_updated_event = build(:geo_repository_updated_event)
subject.repository_updated_event = repository_updated_event
expect(subject.event).to eq repository_updated_event
end
it 'returns repository_deleted_event when set' do
repository_deleted_event = build(:geo_repository_deleted_event)
subject.repository_deleted_event = repository_deleted_event
expect(subject.event).to eq repository_deleted_event
end
it 'returns repository_renamed_event when set' do
repository_renamed_event = build(:geo_repository_renamed_event)
subject.repository_renamed_event = repository_renamed_event
expect(subject.event).to eq repository_renamed_event
end
it 'returns repositories_changed_event when set' do
repositories_changed_event = build(:geo_repositories_changed_event)
subject.repositories_changed_event = repositories_changed_event
expect(subject.event).to eq repositories_changed_event
end
end
describe '#project_id' do
it 'returns nil when having no event associated' do
expect(subject.project_id).to be_nil
end
it 'returns nil when an event does not respond to project_id' do
repositories_changed_event = build(:geo_repositories_changed_event)
subject.repositories_changed_event = repositories_changed_event
expect(subject.project_id).to be_nil
end
it 'returns event#project_id when an event respond to project_id' do
repository_updated_event = build(:geo_repository_updated_event)
subject.repository_updated_event = repository_updated_event
expect(subject.project_id).to eq repository_updated_event.project_id
end
end
end
require 'spec_helper'
describe Geo::RepositoriesChangedEvent, type: :model do
describe 'relationships' do
it { is_expected.to belong_to(:geo_node) }
end
describe 'validations' do
it { is_expected.to validate_presence_of(:geo_node) }
end
end
require 'spec_helper'
describe GeoNodeNamespaceLink, models: true do
describe 'relationships' do
it { is_expected.to belong_to(:geo_node) }
it { is_expected.to belong_to(:namespace) }
end
describe 'validations' do
let!(:geo_node_namespace_link) { create(:geo_node_namespace_link) }
it { is_expected.to validate_presence_of(:namespace_id) }
it { is_expected.to validate_uniqueness_of(:namespace_id).scoped_to(:geo_node_id) }
end
end
......@@ -14,6 +14,9 @@ describe GeoNode, type: :model do
context 'associations' do
it { is_expected.to belong_to(:geo_node_key).dependent(:destroy) }
it { is_expected.to belong_to(:oauth_application).dependent(:destroy) }
it { is_expected.to have_many(:geo_node_namespace_links) }
it { is_expected.to have_many(:namespaces).through(:geo_node_namespace_links) }
end
context 'default values' do
......@@ -313,4 +316,27 @@ describe GeoNode, type: :model do
expect(node).to be_missing_oauth_application
end
end
describe '#restricted_project_ids' do
context 'without namespace restriction' do
it 'returns nil' do
expect(node.restricted_project_ids).to be_nil
end
end
context 'with namespace restrictions' do
it 'returns an array with unique project ids that belong to the namespaces' do
group_1 = create(:group)
group_2 = create(:group)
nested_group_1 = create(:group, parent: group_1)
project_1 = create(:project, group: group_1)
project_2 = create(:project, group: nested_group_1)
project_3 = create(:project, group: group_2)
node.update_attribute(:namespaces, [group_1, group_2, nested_group_1])
expect(node.restricted_project_ids).to match_array([project_1.id, project_2.id, project_3.id])
end
end
end
end
require 'spec_helper'
describe GeoNodeStatus do
let!(:geo_node) { create(:geo_node, :current) }
let(:group) { create(:group) }
let!(:project_1) { create(:project, group: group) }
let!(:project_2) { create(:project, group: group) }
let!(:project_3) { create(:project) }
let!(:project_4) { create(:project) }
subject { described_class.new }
describe '#healthy?' do
......@@ -40,7 +47,7 @@ describe GeoNodeStatus do
expect(subject.attachments_synced_count).to eq(0)
upload = Upload.find_by(model: user, uploader: 'AvatarUploader')
Geo::FileRegistry.create(file_type: :avatar, file_id: upload.id)
create(:geo_file_registry, :avatar, file_id: upload.id)
subject = described_class.new
expect(subject.attachments_count).to eq(1)
......@@ -53,7 +60,7 @@ describe GeoNodeStatus do
expect(subject.attachments_synced_count).to eq(0)
upload = Upload.find_by(model: user, uploader: 'AvatarUploader')
Geo::FileRegistry.create(file_type: :avatar, file_id: upload.id)
create(:geo_file_registry, :avatar, file_id: upload.id)
subject = described_class.new
expect(subject.attachments_count).to eq(1)
......@@ -62,51 +69,97 @@ describe GeoNodeStatus do
end
describe '#attachments_synced_in_percentage' do
it 'returns 0 when no objects are available' do
subject.attachments_count = 0
subject.attachments_synced_count = 0
let(:avatar) { fixture_file_upload(Rails.root.join('spec/fixtures/dk.png')) }
let(:upload_1) { create(:upload, model: group, path: avatar) }
let(:upload_2) { create(:upload, model: project_1, path: avatar) }
before do
create(:upload, model: create(:group), path: avatar)
create(:upload, model: project_3, path: avatar)
end
it 'returns 0 when no objects are available' do
expect(subject.attachments_synced_in_percentage).to eq(0)
end
it 'returns the right percentage' do
subject.attachments_count = 4
subject.attachments_synced_count = 1
it 'returns the right percentage with no group restrictions' do
create(:geo_file_registry, :avatar, file_id: upload_1.id)
create(:geo_file_registry, :avatar, file_id: upload_2.id)
expect(subject.attachments_synced_in_percentage).to be_within(0.0001).of(50)
end
it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group])
create(:geo_file_registry, :avatar, file_id: upload_1.id)
create(:geo_file_registry, :avatar, file_id: upload_2.id)
expect(subject.attachments_synced_in_percentage).to be_within(0.0001).of(25)
expect(subject.attachments_synced_in_percentage).to be_within(0.0001).of(100)
end
end
describe '#lfs_objects_synced_in_percentage' do
it 'returns 0 when no objects are available' do
subject.lfs_objects_count = 0
subject.lfs_objects_synced_count = 0
let(:lfs_object_project) { create(:lfs_objects_project, project: project_1) }
before do
allow(ProjectCacheWorker).to receive(:perform_async).and_return(true)
create(:lfs_objects_project, project: project_1)
create_list(:lfs_objects_project, 2, project: project_3)
end
it 'returns 0 when no objects are available' do
expect(subject.lfs_objects_synced_in_percentage).to eq(0)
end
it 'returns the right percentage' do
subject.lfs_objects_count = 4
subject.lfs_objects_synced_count = 1
it 'returns the right percentage with no group restrictions' do
create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id)
expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(25)
end
it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group])
create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id)
expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(50)
end
end
describe '#repositories_synced_in_percentage' do
it 'returns 0 when no objects are available' do
subject.repositories_count = 0
subject.repositories_synced_count = 0
describe '#repositories_failed_count' do
before do
create(:geo_project_registry, :sync_failed, project: project_1)
create(:geo_project_registry, :sync_failed, project: project_3)
end
it 'returns the right number of failed repos with no group restrictions' do
expect(subject.repositories_failed_count).to eq(2)
end
it 'returns the right number of failed repos with group restrictions' do
geo_node.update_attribute(:namespaces, [group])
expect(subject.repositories_failed_count).to eq(1)
end
end
describe '#repositories_synced_in_percentage' do
it 'returns 0 when no projects are available' do
expect(subject.repositories_synced_in_percentage).to eq(0)
end
it 'returns the right percentage' do
subject.repositories_count = 4
subject.repositories_synced_count = 1
it 'returns the right percentage with no group restrictions' do
create(:geo_project_registry, :synced, project: project_1)
expect(subject.repositories_synced_in_percentage).to be_within(0.0001).of(25)
end
it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group])
create(:geo_project_registry, :synced, project: project_1)
expect(subject.repositories_synced_in_percentage).to be_within(0.0001).of(50)
end
end
context 'when no values are available' do
......
......@@ -4,13 +4,14 @@ describe GeoNodeStatusEntity, :postgresql do
let(:geo_node_status) do
GeoNodeStatus.new(
id: 1,
health: nil,
health: '',
attachments_count: 329,
attachments_synced_count: 141,
lfs_objects_count: 256,
lfs_objects_synced_count: 123,
repositories_count: 10,
repositories_synced_count: 5
repositories_synced_count: 5,
repositories_failed_count: 0
)
end
......
require 'spec_helper'
describe Geo::NodeUpdateService do
let(:group) { create(:group) }
let!(:primary) { create(:geo_node, :primary, :current) }
let(:geo_node) { create(:geo_node) }
let(:geo_node_with_restrictions) { create(:geo_node, namespace_ids: [group.id]) }
describe '#execute' do
it 'updates the node without changing the key' do
original_fingerprint = geo_node.geo_node_key.fingerprint
params = { url: 'http://example.com', geo_node_key_attributes: attributes_for(:key) }
service = described_class.new(geo_node, params)
service.execute
geo_node.reload
expect(geo_node.url.chomp('/')).to eq(params[:url])
expect(geo_node.geo_node_key.fingerprint).to eq(original_fingerprint)
end
it 'returns false when update fails' do
allow(geo_node).to receive(:update).and_return(false)
service = described_class.new(geo_node, { url: 'http://example.com' })
expect(service.execute).to eq false
end
it 'logs an event to the Geo event log when namespaces change' do
service = described_class.new(geo_node, namespace_ids: [group.id])
expect { service.execute }.to change(Geo::RepositoriesChangedEvent, :count).by(1)
end
it 'does not log an event to the Geo event log when removing namespace restrictions' do
service = described_class.new(geo_node_with_restrictions, namespace_ids: [])
expect { service.execute }.not_to change(Geo::RepositoriesChangedEvent, :count)
end
it 'does not log an event to the Geo event log when node is a primary node' do
service = described_class.new(primary, namespace_ids: [group.id])
expect { service.execute }.not_to change(Geo::RepositoriesChangedEvent, :count)
end
end
end
require 'spec_helper'
describe Geo::RepositoriesChangedEventStore do
let(:geo_node) { create(:geo_node) }
subject { described_class.new(geo_node) }
describe '#create' do
it 'does not create an event when not running on a primary node' do
allow(Gitlab::Geo).to receive(:primary?) { false }
expect { subject.create }.not_to change(Geo::RepositoriesChangedEvent, :count)
end
it 'creates a repositories changed event when running on a primary node' do
allow(Gitlab::Geo).to receive(:primary?) { true }
expect { subject.create }.to change(Geo::RepositoriesChangedEvent, :count).by(1)
end
end
end
require 'spec_helper'
describe Geo::FileDownloadDispatchWorker do
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
let!(:secondary) { create(:geo_node, :current) }
before do
@primary = create(:geo_node, :primary, host: 'primary-geo-node')
@secondary = create(:geo_node, :current)
allow(Gitlab::Geo).to receive(:secondary?).and_return(true)
allow_any_instance_of(Gitlab::ExclusiveLease)
.to receive(:try_obtain).and_return(true)
......@@ -28,8 +29,8 @@ describe Geo::FileDownloadDispatchWorker do
it 'does not schedule anything when node is disabled' do
create(:lfs_object, :with_file)
@secondary.enabled = false
@secondary.save
secondary.enabled = false
secondary.save
expect(GeoFileDownloadWorker).not_to receive(:perform_async)
......@@ -73,5 +74,44 @@ describe Geo::FileDownloadDispatchWorker do
subject.perform
end
end
context 'when node has namespace restrictions' do
let(:synced_group) { create(:group) }
let!(:project_in_synced_group) { create(:project, group: synced_group) }
let!(:unsynced_project) { create(:project) }
before do
allow(ProjectCacheWorker).to receive(:perform_async).and_return(true)
allow_any_instance_of(described_class).to receive(:over_time?).and_return(false)
secondary.update_attribute(:namespaces, [synced_group])
end
it 'does not perform GeoFileDownloadWorker for LFS object that does not belong to selected namespaces to replicate' do
lfs_objec_in_synced_group = create(:lfs_objects_project, project: project_in_synced_group)
create(:lfs_objects_project, project: unsynced_project)
expect(GeoFileDownloadWorker).to receive(:perform_async)
.with(:lfs, lfs_objec_in_synced_group.lfs_object_id).once.and_return(spy)
subject.perform
end
it 'does not perform GeoFileDownloadWorker for upload objects that do not belong to selected namespaces to replicate' do
avatar = fixture_file_upload(Rails.root.join('spec/fixtures/dk.png'))
avatar_in_synced_group = create(:upload, model: synced_group, path: avatar)
create(:upload, model: create(:group), path: avatar)
avatar_in_project_in_synced_group = create(:upload, model: project_in_synced_group, path: avatar)
create(:upload, model: unsynced_project, path: avatar)
expect(GeoFileDownloadWorker).to receive(:perform_async)
.with('avatar', avatar_in_project_in_synced_group.id).once.and_return(spy)
expect(GeoFileDownloadWorker).to receive(:perform_async)
.with('avatar', avatar_in_synced_group.id).once.and_return(spy)
subject.perform
end
end
end
end
require 'spec_helper'
describe Geo::RepositoriesCleanUpWorker do
let!(:geo_node) { create(:geo_node) }
let(:synced_group) { create(:group) }
let!(:project_in_synced_group) { create(:project, group: synced_group) }
let!(:unsynced_project) { create(:project) }
describe '#perform' do
before do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { true }
end
context 'when node has namespace restrictions' do
it 'performs GeoRepositoryDestroyWorker for each project that does not belong to selected namespaces to replicate' do
geo_node.update_attribute(:namespaces, [synced_group])
expect(GeoRepositoryDestroyWorker).to receive(:perform_async)
.with(unsynced_project.id, unsynced_project.name, unsynced_project.full_path)
.once.and_return(1)
subject.perform(geo_node.id)
end
end
context 'when does not node have namespace restrictions' do
it 'does not perform GeoRepositoryDestroyWorker' do
expect(GeoRepositoryDestroyWorker).not_to receive(:perform_async)
subject.perform(geo_node.id)
end
end
context 'when cannnot obtain a lease' do
it 'does not perform GeoRepositoryDestroyWorker' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { false }
expect(GeoRepositoryDestroyWorker).not_to receive(:perform_async)
subject.perform(geo_node.id)
end
end
context 'when Geo node could not be found' do
it 'does not raise an error' do
expect { subject.perform(-1) }.not_to raise_error
end
end
end
end
require 'spec_helper'
describe Geo::RepositorySyncWorker do
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
let!(:secondary) { create(:geo_node, :current) }
let!(:project_1) { create(:project) }
let!(:project_2) { create(:project) }
let(:synced_group) { create(:group) }
let!(:project_in_synced_group) { create(:project, group: synced_group) }
let!(:unsynced_project) { create(:project) }
subject { described_class.new }
......@@ -21,8 +22,8 @@ describe Geo::RepositorySyncWorker do
end
it 'performs Geo::ProjectSyncWorker for projects where last attempt to sync failed' do
create(:geo_project_registry, :sync_failed, project: project_1)
create(:geo_project_registry, :synced, project: project_2)
create(:geo_project_registry, :sync_failed, project: project_in_synced_group)
create(:geo_project_registry, :synced, project: unsynced_project)
expect(Geo::ProjectSyncWorker).to receive(:perform_in).once.and_return(spy)
......@@ -30,8 +31,8 @@ describe Geo::RepositorySyncWorker do
end
it 'performs Geo::ProjectSyncWorker for synced projects updated recently' do
create(:geo_project_registry, :synced, :repository_dirty, project: project_1)
create(:geo_project_registry, :synced, project: project_2)
create(:geo_project_registry, :synced, :repository_dirty, project: project_in_synced_group)
create(:geo_project_registry, :synced, project: unsynced_project)
create(:geo_project_registry, :synced, :wiki_dirty)
expect(Geo::ProjectSyncWorker).to receive(:perform_in).twice.and_return(spy)
......@@ -62,5 +63,32 @@ describe Geo::RepositorySyncWorker do
subject.perform
end
context 'when node has namespace restrictions' do
before do
secondary.update_attribute(:namespaces, [synced_group])
end
it 'does not perform Geo::ProjectSyncWorker for projects that do not belong to selected namespaces to replicate' do
expect(Geo::ProjectSyncWorker).to receive(:perform_in)
.with(300, project_in_synced_group.id, within(1.minute).of(Time.now))
.once
.and_return(spy)
subject.perform
end
it 'does not perform Geo::ProjectSyncWorker for synced projects updated recently that do not belong to selected namespaces to replicate' do
create(:geo_project_registry, :synced, :repository_dirty, project: project_in_synced_group)
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project)
expect(Geo::ProjectSyncWorker).to receive(:perform_in)
.with(300, project_in_synced_group.id, within(1.minute).of(Time.now))
.once
.and_return(spy)
subject.perform
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment