Commit a2ab79f9 authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge remote-tracking branch 'ee-com/master' into...

Merge remote-tracking branch 'ee-com/master' into 4249-show-results-from-docker-image-scan-in-the-merge-request-widget
parents df27f7bd a24d2842
......@@ -2,6 +2,8 @@ module Geo
module Fdw
class LfsObject < ::Geo::BaseFdw
self.table_name = Gitlab::Geo.fdw_table('lfs_objects')
scope :with_files_stored_locally, ->() { where(file_store: [nil, LfsObjectUploader::LOCAL_STORE]) }
end
end
end
......@@ -12,8 +12,16 @@ module Geo
{ id: object_db_id, type: object_type, job_id: job_id } if job_id
end
def finder
@finder ||= FileRegistryFinder.new(current_node: current_node)
def attachments_finder
@attachments_finder ||= AttachmentRegistryFinder.new(current_node: current_node)
end
def file_registry_finder
@file_registry_finder ||= FileRegistryFinder.new(current_node: current_node)
end
def lfs_objects_finder
@lfs_objects_finder ||= LfsObjectRegistryFinder.new(current_node: current_node)
end
# Pools for new resources to be transferred
......@@ -26,19 +34,36 @@ module Geo
if remaining_capacity.zero?
resources
else
resources + finder.find_failed_objects(batch_size: remaining_capacity)
resources + find_failed_upload_object_ids(batch_size: remaining_capacity)
end
end
def find_unsynced_objects(batch_size:)
lfs_object_ids = finder.find_nonreplicated_lfs_objects(batch_size: batch_size, except_registry_ids: scheduled_file_ids(:lfs))
upload_objects_ids = finder.find_nonreplicated_uploads(batch_size: batch_size, except_registry_ids: scheduled_file_ids(Geo::FileService::DEFAULT_OBJECT_TYPES))
lfs_object_ids = find_unsynced_lfs_objects_ids(batch_size: batch_size)
attachment_ids = find_unsynced_attachments_ids(batch_size: batch_size)
interleave(lfs_object_ids, attachment_ids)
end
def find_unsynced_lfs_objects_ids(batch_size:)
lfs_objects_finder.find_unsynced_lfs_objects(batch_size: batch_size, except_registry_ids: scheduled_file_ids(:lfs))
.pluck(:id)
.map { |id| [id, :lfs] }
end
def find_unsynced_attachments_ids(batch_size:)
attachments_finder.find_unsynced_attachments(batch_size: batch_size, except_registry_ids: scheduled_file_ids(Geo::FileService::DEFAULT_OBJECT_TYPES))
.pluck(:id, :uploader)
.map { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
end
interleave(lfs_object_ids, upload_objects_ids)
def find_failed_upload_object_ids(batch_size:)
file_registry_finder.find_failed_file_registries(batch_size: batch_size)
.pluck(:file_id, :file_type)
end
def scheduled_file_ids(file_types)
file_types = Array(file_types) unless file_types.is_a? Array
file_types = Array(file_types)
scheduled_jobs.select { |data| file_types.include?(data[:type]) }.map { |data| data[:id] }
end
......
---
title: Geo - Fix difference in FDW / non-FDW queries for Geo::FileRegistry queries
merge_request: 3714
author:
type: fixed
......@@ -15,3 +15,4 @@ providers.
- [CAS](../../integration/cas.md) Configure GitLab to sign in using CAS
- [SAML](../../integration/saml.md) Configure GitLab as a SAML 2.0 Service Provider
- [Okta](okta.md) Configure GitLab to sign in using Okta
- [Authentiq](authentiq.md): Enable the Authentiq OmniAuth provider for passwordless authentication
......@@ -37,6 +37,7 @@ Follow the steps below to configure an active/active setup:
1. [Configure the database](database.md)
1. [Configure Redis](redis.md)
1. [Configure Redis for GitLab source installations](redis_source.md)
1. [Configure NFS](nfs.md)
1. [Configure the GitLab application servers](gitlab.md)
1. [Configure the load balancers](load_balancer.md)
......
......@@ -19,14 +19,13 @@ Learn how to install, configure, update, and maintain your GitLab instance.
- **(EES/EEP)** [Omnibus support for external MySQL DB](https://docs.gitlab.com/omnibus/settings/database.html#using-a-mysql-database-management-server-enterprise-edition-only): Omnibus package supports configuring an external MySQL database.
- **(EES/EEP)** [Omnibus support for log forwarding](https://docs.gitlab.com/omnibus/settings/logs.html#udp-log-shipping-gitlab-enterprise-edition-only)
- [High Availability](high_availability/README.md): Configure multiple servers for scaling or high availability.
- [High Availability on AWS](../university/high-availability/aws/README.md): Set up GitLab HA on Amazon AWS.
- **(EEP)** [GitLab GEO](../gitlab-geo/README.md): Replicate your GitLab instance to other geographical locations as a read-only fully operational version.
- **(EEP)** [Pivotal Tile](../install/pivotal/index.md): Deploy GitLab as a pre-configured appliance using Ops Manager (BOSH) for Pivotal Cloud Foundry.
### Configuring GitLab
- [Adjust your instance's timezone](../workflow/timezone.md): Customize the default time zone of GitLab.
- [Header logo](../customization/branded_page_and_email_header.md): Change the logo on all pages and email headers.
- [Welcome message](../customization/welcome_message.md): Add a custom welcome message to the sign-in page.
- [System hooks](../system_hooks/system_hooks.md): Notifications when users, projects and keys are changed.
- [Security](../security/README.md): Learn what you can do to further secure your GitLab instance.
- [Usage statistics, version check, and usage ping](../user/admin_area/settings/usage_statistics.md): Enable or disable information about your instance to be sent to GitLab, Inc.
......@@ -37,6 +36,13 @@ Learn how to install, configure, update, and maintain your GitLab instance.
- [Environment variables](environment_variables.md): Supported environment variables that can be used to override their defaults values in order to configure GitLab.
- **(EES/EEP)** [Elasticsearch](../integration/elasticsearch.md): Enable Elasticsearch to empower GitLab's Advanced Global Search. Useful when you deal with a huge amount of data.
#### Customizing GitLab's appearance
- [Header logo](../customization/branded_page_and_email_header.md): Change the logo on all pages and email headers.
- [Branded login page](../customization/branded_login_page.md): Customize the login page with your own logo, title, and description.
- [Welcome message](../customization/welcome_message.md): Add a custom welcome message to the sign-in page.
- ["New Project" page](../customization/new_project_page.md): Customize the text to be displayed on the page that opens whenever your users create a new project.
### Maintaining GitLab
- [Raketasks](../raketasks/README.md): Perform various tasks for maintenance, backups, automatic webhooks setup, etc.
......@@ -85,6 +91,7 @@ server with IMAP authentication on Ubuntu, to be used with Reply by email.
- [Issue closing pattern](issue_closing_pattern.md): Customize how to close an issue from commit messages.
- [Gitaly](gitaly/index.md): Configuring Gitaly, GitLab's Git repository storage service.
- [Default labels](../user/admin_area/labels.html): Create labels that will be automatically added to every new project.
- [Restrict the use of public or internal projects](../public_access/public_access.md#restricting-the-use-of-public-or-internal-projects): Restrict the use of visibility levels for users when they create a project or a snippet.
### Repository settings
......
---
comments: false
---
This document was split into:
- [administration/issue_closing_pattern.md](../administration/issue_closing_pattern.md).
......
......@@ -8,5 +8,5 @@ It is possible to add a markdown-formatted welcome message to your GitLab
sign-in page. Users of GitLab Enterprise Edition should use the [branded login
page feature](branded_login_page.md) instead.
The welcome message (extra_sign_in_text) can now be set/changed in the Admin UI.
The welcome message (extra_sign_in_text) can now be set/changed in the Admin UI.
Admin area > Settings
# From Community Edition 10.3 to Enterprise Edition 10.3
This guide assumes you have a correctly configured and tested installation of
GitLab Community Edition 10.3. If you run into any trouble or if you have any
questions please contact us at [support@gitlab.com].
### 0. Backup
Make a backup just in case something goes wrong:
```bash
cd /home/git/gitlab
sudo -u git -H bundle exec rake gitlab:backup:create RAILS_ENV=production
```
For installations using MySQL, this may require granting "LOCK TABLES"
privileges to the GitLab user on the database version.
### 1. Stop server
```bash
sudo service gitlab stop
```
### 2. Get the EE code
```bash
cd /home/git/gitlab
sudo -u git -H git remote add -f ee https://gitlab.com/gitlab-org/gitlab-ee.git
sudo -u git -H git checkout 10-3-stable-ee
```
### 3. Install libs, migrations, etc.
```bash
cd /home/git/gitlab
# MySQL installations (note: the line below states '--without postgres')
sudo -u git -H bundle install --without postgres development test --deployment
# PostgreSQL installations (note: the line below states '--without mysql')
sudo -u git -H bundle install --without mysql development test --deployment
# Run database migrations
sudo -u git -H bundle exec rake db:migrate RAILS_ENV=production
# Clean up assets and cache
sudo -u git -H bundle exec rake assets:clean assets:precompile cache:clear RAILS_ENV=production
```
### 4. Start application
```bash
sudo service gitlab start
sudo service nginx restart
```
### 5. Check application status
Check if GitLab and its environment are configured correctly:
```bash
sudo -u git -H bundle exec rake gitlab:env:info RAILS_ENV=production
```
To make sure you didn't miss anything run a more thorough check with:
```bash
sudo -u git -H bundle exec rake gitlab:check RAILS_ENV=production
```
If all items are green, then congratulations upgrade complete!
## Things went south? Revert to previous version (Community Edition 10.3)
### 1. Revert the code to the previous version
```bash
cd /home/git/gitlab
sudo -u git -H git checkout 10-3-stable
```
### 2. Restore from the backup
```bash
cd /home/git/gitlab
sudo -u git -H bundle exec rake gitlab:backup:restore RAILS_ENV=production
```
[support@gitlab.com]: mailto:support@gitlab.com
module Geo
class AttachmentRegistryFinder < RegistryFinder
class AttachmentRegistryFinder < FileRegistryFinder
def attachments
if selective_sync?
Upload.where(group_uploads.or(project_uploads).or(other_uploads))
else
Upload.all
end
end
def count_attachments
uploads.count
attachments.count
end
def count_synced_attachments
......@@ -34,12 +42,25 @@ module Geo
relation
end
def uploads
if selective_sync?
Upload.where(group_uploads.or(project_uploads).or(other_uploads))
else
Upload.all
end
# Find limited amount of non replicated attachments.
#
# You can pass a list with `except_registry_ids:` so you can exclude items you
# already scheduled but haven't finished and persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_registry_ids ids that will be ignored from the query
def find_unsynced_attachments(batch_size:, except_registry_ids: [])
relation =
if use_legacy_queries?
legacy_find_unsynced_attachments(except_registry_ids: except_registry_ids)
else
fdw_find_unsynced_attachments(except_registry_ids: except_registry_ids)
end
relation.limit(batch_size)
end
private
......@@ -85,29 +106,45 @@ module Geo
.merge(Geo::FileRegistry.attachments)
end
def fdw_find_unsynced_attachments(except_registry_ids:)
fdw_table = Geo::Fdw::Upload.table_name
upload_types = Geo::FileService::DEFAULT_OBJECT_TYPES.map { |val| "'#{val}'" }.join(',')
Geo::Fdw::Upload.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_table}.id
AND file_registry.file_type IN (#{upload_types})")
.where(file_registry: { id: nil })
.where.not(id: except_registry_ids)
end
#
# Legacy accessors (non FDW)
#
def legacy_find_synced_attachments
legacy_find_attachments(Geo::FileRegistry.attachments.synced.pluck(:file_id))
legacy_inner_join_registry_ids(
attachments,
Geo::FileRegistry.attachments.synced.pluck(:file_id),
Upload
)
end
def legacy_find_failed_attachments
legacy_find_attachments(Geo::FileRegistry.attachments.failed.pluck(:file_id))
legacy_inner_join_registry_ids(
attachments,
Geo::FileRegistry.attachments.failed.pluck(:file_id),
Upload
)
end
def legacy_find_attachments(registry_file_ids)
return Upload.none if registry_file_ids.empty?
joined_relation = uploads.joins(<<~SQL)
INNER JOIN
(VALUES #{registry_file_ids.map { |id| "(#{id})" }.join(',')})
file_registry(file_id)
ON #{Upload.table_name}.id = file_registry.file_id
SQL
def legacy_find_unsynced_attachments(except_registry_ids:)
registry_ids = legacy_pluck_registry_ids(file_types: Geo::FileService::DEFAULT_OBJECT_TYPES, except_registry_ids: except_registry_ids)
joined_relation
legacy_left_outer_join_registry_ids(
attachments,
registry_ids,
Upload
)
end
end
end
module Geo
class FileRegistryFinder < RegistryFinder
def find_failed_objects(batch_size:)
Geo::FileRegistry
.failed
.retry_due
.limit(batch_size)
.pluck(:file_id, :file_type)
end
# Find limited amount of non replicated lfs objects.
#
# You can pass a list with `except_registry_ids:` so you can exclude items you
# already scheduled but haven't finished and persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_registry_ids ids that will be ignored from the query
def find_nonreplicated_lfs_objects(batch_size:, except_registry_ids:)
# Selective project replication adds a wrinkle to FDW queries, so
# we fallback to the legacy version for now.
relation =
if use_legacy_queries?
legacy_find_nonreplicated_lfs_objects(except_registry_ids: except_registry_ids)
else
fdw_find_nonreplicated_lfs_objects
end
relation
.limit(batch_size)
.pluck(:id)
.map { |id| [id, :lfs] }
end
# Find limited amount of non replicated uploads.
#
# You can pass a list with `except_registry_ids:` so you can exclude items you
# already scheduled but haven't finished and persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_registry_ids ids that will be ignored from the query
def find_nonreplicated_uploads(batch_size:, except_registry_ids:)
# Selective project replication adds a wrinkle to FDW queries, so
# we fallback to the legacy version for now.
relation =
if use_legacy_queries?
legacy_find_nonreplicated_uploads(except_registry_ids: except_registry_ids)
else
fdw_find_nonreplicated_uploads
end
relation
.limit(batch_size)
.pluck(:id, :uploader)
.map { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
def find_failed_file_registries(batch_size:)
Geo::FileRegistry.failed.retry_due.limit(batch_size)
end
protected
#
# FDW accessors
#
def fdw_find_nonreplicated_lfs_objects
fdw_table = Geo::Fdw::LfsObject.table_name
# Filter out objects in object storage (this is done in GeoNode#lfs_objects)
Geo::Fdw::LfsObject.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_table}.id
AND file_registry.file_type = 'lfs'")
.where("#{fdw_table}.file_store IS NULL OR #{fdw_table}.file_store = #{LfsObjectUploader::LOCAL_STORE}")
.where('file_registry.file_id IS NULL')
end
def fdw_find_nonreplicated_uploads
fdw_table = Geo::Fdw::Upload.table_name
upload_types = Geo::FileService::DEFAULT_OBJECT_TYPES.map { |val| "'#{val}'" }.join(',')
Geo::Fdw::Upload.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_table}.id
AND file_registry.file_type IN (#{upload_types})")
.where('file_registry.file_id IS NULL')
end
#
# Legacy accessors (non FDW)
#
def legacy_find_nonreplicated_lfs_objects(except_registry_ids:)
registry_ids = legacy_pluck_registry_ids(file_types: :lfs, except_registry_ids: except_registry_ids)
legacy_filter_registry_ids(
lfs_objects_finder.lfs_objects,
registry_ids,
LfsObject.table_name
)
end
def legacy_find_nonreplicated_uploads(except_registry_ids:)
registry_ids = legacy_pluck_registry_ids(file_types: Geo::FileService::DEFAULT_OBJECT_TYPES, except_registry_ids: except_registry_ids)
legacy_filter_registry_ids(
attachments_finder.uploads,
registry_ids,
Upload.table_name
)
end
# This query requires data from two different databases, and unavoidably
# plucks a list of file IDs from one into the other. This will not scale
# well with the number of synchronized files--the query will increase
# linearly in size--so this should be replaced with postgres_fdw ASAP.
def legacy_filter_registry_ids(objects, registry_ids, table_name)
return objects if registry_ids.empty?
joined_relation = objects.joins(<<~SQL)
LEFT OUTER JOIN
(VALUES #{registry_ids.map { |id| "(#{id}, 't')" }.join(',')})
file_registry(file_id, registry_present)
ON #{table_name}.id = file_registry.file_id
SQL
joined_relation.where(file_registry: { registry_present: [nil, false] })
end
def legacy_pluck_registry_ids(file_types:, except_registry_ids:)
ids = Geo::FileRegistry.where(file_type: file_types).pluck(:file_id)
(ids + except_registry_ids).uniq
end
def attachments_finder
@attachments_finder ||= AttachmentRegistryFinder.new(current_node: current_node)
end
def lfs_objects_finder
@lfs_objects_finder ||= LfsObjectRegistryFinder.new(current_node: current_node)
end
end
end
module Geo
class LfsObjectRegistryFinder < RegistryFinder
class LfsObjectRegistryFinder < FileRegistryFinder
def count_lfs_objects
lfs_objects.count
end
......@@ -26,6 +26,27 @@ module Geo
relation.count
end
# Find limited amount of non replicated lfs objects.
#
# You can pass a list with `except_registry_ids:` so you can exclude items you
# already scheduled but haven't finished and persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_registry_ids ids that will be ignored from the query
def find_unsynced_lfs_objects(batch_size:, except_registry_ids: [])
relation =
if use_legacy_queries?
legacy_find_unsynced_lfs_objects(except_registry_ids: except_registry_ids)
else
fdw_find_unsynced_lfs_objects(except_registry_ids: except_registry_ids)
end
relation.limit(batch_size)
end
def lfs_objects
relation =
if selective_sync?
......@@ -47,29 +68,50 @@ module Geo
Geo::FileRegistry.lfs_objects.failed
end
#
# FDW accessors
#
def fdw_find_unsynced_lfs_objects(except_registry_ids:)
fdw_table = Geo::Fdw::LfsObject.table_name
# Filter out objects in object storage (this is done in GeoNode#lfs_objects)
Geo::Fdw::LfsObject.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_table}.id
AND file_registry.file_type = 'lfs'")
.merge(Geo::Fdw::LfsObject.with_files_stored_locally)
.where(file_registry: { id: nil })
.where.not(id: except_registry_ids)
end
#
# Legacy accessors (non FDW)
#
def legacy_find_synced_lfs_objects
legacy_find_lfs_objects(find_synced_lfs_objects_registries.pluck(:file_id))
legacy_inner_join_registry_ids(
lfs_objects,
find_synced_lfs_objects_registries.pluck(:file_id),
LfsObject
)
end
def legacy_find_failed_lfs_objects
legacy_find_lfs_objects(find_failed_lfs_objects_registries.pluck(:file_id))
legacy_inner_join_registry_ids(
lfs_objects,
find_failed_lfs_objects_registries.pluck(:file_id),
LfsObject
)
end
def legacy_find_lfs_objects(registry_file_ids)
return LfsObject.none if registry_file_ids.empty?
lfs_objects = LfsObject.joins(:projects)
.where(projects: { id: current_node.projects })
.with_files_stored_locally
joined_relation = lfs_objects.joins(<<~SQL)
INNER JOIN
(VALUES #{registry_file_ids.map { |id| "(#{id})" }.join(',')})
file_registry(file_id)
ON #{LfsObject.table_name}.id = file_registry.file_id
SQL
def legacy_find_unsynced_lfs_objects(except_registry_ids:)
registry_ids = legacy_pluck_registry_ids(file_types: :lfs, except_registry_ids: except_registry_ids)
joined_relation
legacy_left_outer_join_registry_ids(
lfs_objects,
registry_ids,
LfsObject
)
end
end
end
......@@ -7,7 +7,7 @@ module Geo
def count_synced_project_registries
relation =
if selective_sync?
legacy_find_synced_project_registries
legacy_find_synced_projects
else
find_synced_project_registries
end
......@@ -22,7 +22,7 @@ module Geo
def find_failed_project_registries(type = nil)
relation =
if selective_sync?
legacy_find_filtered_failed_project_registries(type)
legacy_find_filtered_failed_projects(type)
else
find_filtered_failed_project_registries(type)
end
......@@ -80,7 +80,7 @@ module Geo
# @return [ActiveRecord::Relation<Geo::Fdw::Project>]
def fdw_find_unsynced_projects
Geo::Fdw::Project.joins("LEFT OUTER JOIN project_registry ON project_registry.project_id = #{fdw_table}.id")
.where('project_registry.project_id IS NULL')
.where(project_registry: { project_id: nil })
end
# @return [ActiveRecord::Relation<Geo::Fdw::Project>]
......@@ -96,61 +96,39 @@ module Geo
# @return [ActiveRecord::Relation<Project>] list of unsynced projects
def legacy_find_unsynced_projects
registry_project_ids = Geo::ProjectRegistry.pluck(:project_id)
return current_node.projects if registry_project_ids.empty?
joined_relation = current_node.projects.joins(<<~SQL)
LEFT OUTER JOIN
(VALUES #{registry_project_ids.map { |id| "(#{id}, 't')" }.join(',')})
project_registry(project_id, registry_present)
ON projects.id = project_registry.project_id
SQL
joined_relation.where(project_registry: { registry_present: [nil, false] })
legacy_left_outer_join_registry_ids(
current_node.projects,
Geo::ProjectRegistry.pluck(:project_id),
Project
)
end
# @return [ActiveRecord::Relation<Project>] list of projects updated recently
def legacy_find_projects_updated_recently
legacy_find_projects(Geo::ProjectRegistry.dirty.retry_due.pluck(:project_id))
end
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of synced projects
def legacy_find_synced_project_registries
legacy_find_project_registries(Geo::ProjectRegistry.synced)
end
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of projects that sync has failed
def legacy_find_filtered_failed_project_registries(type = nil)
project_registries = find_filtered_failed_project_registries(type)
legacy_find_project_registries(project_registries)
end
# @return [ActiveRecord::Relation<Project>]
def legacy_find_projects(registry_project_ids)
return Project.none if registry_project_ids.empty?
joined_relation = current_node.projects.joins(<<~SQL)
INNER JOIN
(VALUES #{registry_project_ids.map { |id| "(#{id})" }.join(',')})
project_registry(project_id)
ON #{Project.table_name}.id = project_registry.project_id
SQL
joined_relation
end
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>]
def legacy_find_project_registries(project_registries)
return Geo::ProjectRegistry.none if project_registries.empty?
joined_relation = project_registries.joins(<<~SQL)
INNER JOIN
(VALUES #{current_node.projects.pluck(:id).map { |id| "(#{id})" }.join(',')})
projects(id)
ON #{Geo::ProjectRegistry.table_name}.project_id = projects.id
SQL
joined_relation
legacy_inner_join_registry_ids(
current_node.projects,
Geo::ProjectRegistry.dirty.retry_due.pluck(:project_id),
Project
)
end
# @return [ActiveRecord::Relation<Project>] list of synced projects
def legacy_find_synced_projects
legacy_inner_join_registry_ids(
current_node.projects,
Geo::ProjectRegistry.synced.pluck(:project_id),
Project
)
end
# @return [ActiveRecord::Relation<Project>] list of projects that sync has failed
def legacy_find_filtered_failed_projects(type = nil)
legacy_inner_join_registry_ids(
find_filtered_failed_project_registries(type),
current_node.projects.pluck(:id),
Geo::ProjectRegistry,
foreign_key: :project_id
)
end
end
end
......@@ -15,5 +15,31 @@ module Geo
# queries, so we fallback to the legacy version for now.
!Gitlab::Geo.fdw? || selective_sync?
end
def legacy_inner_join_registry_ids(objects, registry_ids, klass, foreign_key: :id)
return klass.none if registry_ids.empty?
joined_relation = objects.joins(<<~SQL)
INNER JOIN
(VALUES #{registry_ids.map { |id| "(#{id})" }.join(',')})
registry(id)
ON #{klass.table_name}.#{foreign_key} = registry.id
SQL
joined_relation
end
def legacy_left_outer_join_registry_ids(objects, registry_ids, klass)
return objects if registry_ids.empty?
joined_relation = objects.joins(<<~SQL)
LEFT OUTER JOIN
(VALUES #{registry_ids.map { |id| "(#{id}, 't')" }.join(',')})
registry(id, registry_present)
ON #{klass.table_name}.id = registry.id
SQL
joined_relation.where(registry: { registry_present: [nil, false] })
end
end
end
......@@ -11,10 +11,10 @@ describe Geo::AttachmentRegistryFinder, :geo do
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project, group: unsynced_group) }
let(:upload_1) { create(:upload, model: synced_group) }
let(:upload_2) { create(:upload, model: unsynced_group) }
let(:upload_3) { create(:upload, :issuable_upload, model: synced_project) }
let(:upload_4) { create(:upload, model: unsynced_project) }
let!(:upload_1) { create(:upload, model: synced_group) }
let!(:upload_2) { create(:upload, model: unsynced_group) }
let!(:upload_3) { create(:upload, :issuable_upload, model: synced_project) }
let!(:upload_4) { create(:upload, model: unsynced_project) }
let(:upload_5) { create(:upload, model: synced_project) }
let(:upload_6) { create(:upload, :personal_snippet) }
let(:upload_7) { create(:upload, model: synced_subgroup) }
......@@ -112,6 +112,30 @@ describe Geo::AttachmentRegistryFinder, :geo do
end
end
end
describe '#find_unsynced_attachments' do
it 'delegates to #fdw_find_unsynced_attachments' do
expect(subject).to receive(:fdw_find_unsynced_attachments).and_call_original
subject.find_unsynced_attachments(batch_size: 10)
end
it 'returns uploads without an entry on the tracking database' do
create(:geo_file_registry, :avatar, file_id: upload_1.id, success: true)
uploads = subject.find_unsynced_attachments(batch_size: 10)
expect(uploads.map(&:id)).to match_array([upload_2.id, upload_3.id, upload_4.id])
end
it 'excludes uploads without an entry on the tracking database' do
create(:geo_file_registry, :avatar, file_id: upload_1.id, success: true)
uploads = subject.find_unsynced_attachments(batch_size: 10, except_registry_ids: [upload_2.id])
expect(uploads.map(&:id)).to match_array([upload_3.id, upload_4.id])
end
end
end
context 'Legacy' do
......@@ -198,5 +222,29 @@ describe Geo::AttachmentRegistryFinder, :geo do
end
end
end
describe '#find_unsynced_attachments' do
it 'delegates to #legacy_find_unsynced_attachments' do
expect(subject).to receive(:legacy_find_unsynced_attachments).and_call_original
subject.find_unsynced_attachments(batch_size: 10)
end
it 'returns LFS objects without an entry on the tracking database' do
create(:geo_file_registry, :avatar, file_id: upload_1.id, success: true)
uploads = subject.find_unsynced_attachments(batch_size: 10)
expect(uploads).to match_array([upload_2, upload_3, upload_4])
end
it 'excludes uploads without an entry on the tracking database' do
create(:geo_file_registry, :avatar, file_id: upload_1.id, success: true)
uploads = subject.find_unsynced_attachments(batch_size: 10, except_registry_ids: [upload_2.id])
expect(uploads).to match_array([upload_3, upload_4])
end
end
end
end
require 'spec_helper'
describe Geo::FileRegistryFinder, :geo do
include ::EE::GeoHelpers
let(:secondary) { create(:geo_node) }
subject { described_class.new(current_node: secondary) }
before do
stub_current_geo_node(secondary)
end
describe '#find_failed_file_registries' do
it 'returs uploads that sync has failed' do
failed_lfs_registry = create(:geo_file_registry, :lfs, :with_file, success: false)
failed_file_upload = create(:geo_file_registry, :with_file, success: false)
failed_issuable_upload = create(:geo_file_registry, :with_file, success: false)
create(:geo_file_registry, :lfs, :with_file, success: true)
create(:geo_file_registry, :with_file, success: true)
uploads = subject.find_failed_file_registries(batch_size: 10)
expect(uploads).to match_array([failed_lfs_registry, failed_file_upload, failed_issuable_upload])
end
end
end
......@@ -7,9 +7,11 @@ describe Geo::LfsObjectRegistryFinder, :geo do
let(:synced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project) }
let(:lfs_object_1) { create(:lfs_object) }
let(:lfs_object_2) { create(:lfs_object) }
let(:lfs_object_3) { create(:lfs_object) }
let!(:lfs_object_1) { create(:lfs_object) }
let!(:lfs_object_2) { create(:lfs_object) }
let!(:lfs_object_3) { create(:lfs_object) }
let!(:lfs_object_4) { create(:lfs_object) }
subject { described_class.new(current_node: secondary) }
......@@ -100,4 +102,70 @@ describe Geo::LfsObjectRegistryFinder, :geo do
end
end
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
context 'FDW', :delete do
before do
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo.fdw?
end
describe '#find_unsynced_lfs_objects' do
it 'delegates to #fdw_find_unsynced_lfs_objects' do
expect(subject).to receive(:fdw_find_unsynced_lfs_objects).and_call_original
subject.find_unsynced_lfs_objects(batch_size: 10)
end
it 'returns LFS objects without an entry on the tracking database' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: true)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10)
expect(lfs_objects.map(&:id)).to match_array([lfs_object_2.id, lfs_object_4.id])
end
it 'excludes LFS objects without an entry on the tracking database' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: true)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10, except_registry_ids: [lfs_object_2.id])
expect(lfs_objects.map(&:id)).to match_array([lfs_object_4.id])
end
end
end
context 'Legacy' do
before do
allow(Gitlab::Geo).to receive(:fdw?).and_return(false)
end
describe '#find_unsynced_lfs_objects' do
it 'delegates to #legacy_find_unsynced_lfs_objects' do
expect(subject).to receive(:legacy_find_unsynced_lfs_objects).and_call_original
subject.find_unsynced_lfs_objects(batch_size: 10)
end
it 'returns LFS objects without an entry on the tracking database' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: true)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10)
expect(lfs_objects).to match_array([lfs_object_2, lfs_object_4])
end
it 'excludes LFS objects without an entry on the tracking database' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: true)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10, except_registry_ids: [lfs_object_2.id])
expect(lfs_objects).to match_array([lfs_object_4])
end
end
end
end
......@@ -37,8 +37,8 @@ describe Geo::ProjectRegistryFinder, :geo do
secondary.update_attribute(:namespaces, [synced_group])
end
it 'delegates to #legacy_find_synced_project_registries' do
expect(subject).to receive(:legacy_find_synced_project_registries).and_call_original
it 'delegates to #legacy_find_synced_projects' do
expect(subject).to receive(:legacy_find_synced_projects).and_call_original
subject.count_synced_project_registries
end
......@@ -128,8 +128,8 @@ describe Geo::ProjectRegistryFinder, :geo do
secondary.update_attribute(:namespaces, [synced_group])
end
it 'delegates to #legacy_find_filtered_failed_project_registries' do
expect(subject).to receive(:legacy_find_filtered_failed_project_registries).and_call_original
it 'delegates to #legacy_find_filtered_failed_projects' do
expect(subject).to receive(:legacy_find_filtered_failed_projects).and_call_original
subject.find_failed_project_registries
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment