Commit a50afe7c authored by Nick Thomas's avatar Nick Thomas

Merge branch 'ee-hashed-storage-attachments-migration-path' into 'master'

[EE] Hashed Storage Attachments migration path

See merge request gitlab-org/gitlab-ee!3544
parents 79aef9e9 8c1a92bc
......@@ -30,6 +30,10 @@ module Geo
class_name: 'Geo::LfsObjectDeletedEvent',
foreign_key: :lfs_object_deleted_event_id
belongs_to :hashed_storage_attachments_event,
class_name: 'Geo::HashedStorageAttachmentsEvent',
foreign_key: :hashed_storage_attachments_event_id
def self.latest_event
order(id: :desc).first
end
......@@ -41,7 +45,8 @@ module Geo
repository_renamed_event ||
repositories_changed_event ||
hashed_storage_migrated_event ||
lfs_object_deleted_event
lfs_object_deleted_event ||
hashed_storage_attachments_event
end
def project_id
......
module Geo
class HashedStorageAttachmentsEvent < ActiveRecord::Base
include Geo::Model
belongs_to :project
validates :project, :old_attachments_path, :new_attachments_path, presence: true
end
end
......@@ -277,8 +277,9 @@ class Project < ActiveRecord::Base
scope :pending_delete, -> { where(pending_delete: true) }
scope :without_deleted, -> { where(pending_delete: false) }
scope :with_hashed_storage, -> { where('storage_version >= 1') }
scope :with_legacy_storage, -> { where(storage_version: [nil, 0]) }
scope :with_storage_feature, ->(feature) { where('storage_version >= :version', version: HASHED_STORAGE_FEATURES[feature]) }
scope :without_storage_feature, ->(feature) { where('storage_version < :version OR storage_version IS NULL', version: HASHED_STORAGE_FEATURES[feature]) }
scope :with_unmigrated_storage, -> { where('storage_version < :version OR storage_version IS NULL', version: LATEST_STORAGE_VERSION) }
scope :sorted_by_activity, -> { reorder(last_activity_at: :desc) }
scope :sorted_by_stars, -> { reorder('projects.star_count DESC') }
......
......@@ -4,7 +4,6 @@ module Storage
delegate :gitlab_shell, :repository_storage_path, to: :project
ROOT_PATH_PREFIX = '@hashed'.freeze
STORAGE_VERSION = 1
def initialize(project)
@project = project
......
module Geo
class HashedStorageAttachmentsEventStore < EventStore
self.event_type = :hashed_storage_attachments_event
private
def build_event
Geo::HashedStorageAttachmentsEvent.new(
project: project,
old_attachments_path: old_attachments_path,
new_attachments_path: new_attachments_path
)
end
def old_attachments_path
params.fetch(:old_attachments_path)
end
def new_attachments_path
params.fetch(:new_attachments_path)
end
end
end
module Geo
AttachmentMigrationError = Class.new(StandardError)
class HashedStorageAttachmentsMigrationService
include ::Gitlab::Geo::LogHelpers
attr_reader :project_id, :old_attachments_path, :new_attachments_path
def initialize(project_id, old_attachments_path:, new_attachments_path:)
@project_id = project_id
@old_attachments_path = old_attachments_path
@new_attachments_path = new_attachments_path
end
def async_execute
Geo::HashedStorageAttachmentsMigrationWorker.perform_async(
project_id,
old_attachments_path,
new_attachments_path
)
end
def execute
origin = File.join(CarrierWave.root, FileUploader.base_dir, old_attachments_path)
target = File.join(CarrierWave.root, FileUploader.base_dir, new_attachments_path)
move_folder!(origin, target)
end
private
def project
@project ||= Project.find(project_id)
end
def move_folder!(old_path, new_path)
unless File.directory?(old_path)
log_info("Skipped attachments migration to Hashed Storage, source path doesn't exist or is not a directory", project_id: project.id, source: old_path, target: new_path)
return
end
if File.exist?(new_path)
log_error("Cannot migrate attachments to Hashed Storage, target path already exist", project_id: project.id, source: old_path, target: new_path)
raise AttachmentMigrationError, "Target path '#{new_path}' already exist"
end
# Create hashed storage base path folder
FileUtils.mkdir_p(File.dirname(new_path))
FileUtils.mv(old_path, new_path)
log_info("Migrated project attachments to Hashed Storage", project_id: project.id, source: old_path, target: new_path)
true
end
end
end
module Geo
class HashedStorageMigrationService
include ::Gitlab::Geo::LogHelpers
attr_reader :project_id, :old_disk_path, :new_disk_path, :old_storage_version
def initialize(project_id, old_disk_path:, new_disk_path:, old_storage_version:)
......@@ -22,9 +24,12 @@ module Geo
project.expire_caches_before_rename(old_disk_path)
if migrating_from_legacy_storage? && !move_repository
log_error("Repository could not be migrated to Hashed Storage", project_id: project.id, source: old_disk_path, target: new_disk_path)
raise RepositoryCannotBeRenamed, "Repository #{old_disk_path} could not be renamed to #{new_disk_path}"
end
log_info("Repository migrated to Hashed Storage", project_id: project.id, source: old_disk_path, target: new_disk_path)
true
end
......
module Projects
module HashedStorage
AttachmentMigrationError = Class.new(StandardError)
class MigrateAttachmentsService < BaseService
attr_reader :logger, :old_path, :new_path
prepend ::EE::Projects::HashedStorage::MigrateAttachmentsService
def initialize(project, logger = nil)
@project = project
@logger = logger || Rails.logger
end
def execute
@old_path = project.full_path
@new_path = project.disk_path
origin = FileUploader.dynamic_path_segment(project)
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:attachments]
target = FileUploader.dynamic_path_segment(project)
result = move_folder!(origin, target)
project.save!
if result && block_given?
yield
end
result
end
private
def move_folder!(old_path, new_path)
unless File.directory?(old_path)
logger.info("Skipped attachments migration from '#{old_path}' to '#{new_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})")
return
end
if File.exist?(new_path)
logger.error("Cannot migrate attachments from '#{old_path}' to '#{new_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentMigrationError, "Target path '#{new_path}' already exist"
end
# Create hashed storage base path folder
FileUtils.mkdir_p(File.dirname(new_path))
FileUtils.mv(old_path, new_path)
logger.info("Migrated project attachments from '#{old_path}' to '#{new_path}' (PROJECT_ID=#{project.id})")
true
end
end
end
end
module Projects
module HashedStorage
class MigrateRepositoryService < BaseService
include Gitlab::ShellAdapter
prepend ::EE::Projects::HashedStorage::MigrateRepositoryService
attr_reader :old_disk_path, :new_disk_path, :old_wiki_disk_path, :old_storage_version, :logger
def initialize(project, logger = nil)
@project = project
@logger = logger || Rails.logger
end
def execute
@old_disk_path = project.disk_path
has_wiki = project.wiki.repository_exists?
@old_storage_version = project.storage_version
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:repository]
project.ensure_storage_path_exists
@new_disk_path = project.disk_path
result = move_repository(@old_disk_path, @new_disk_path)
if has_wiki
@old_wiki_disk_path = "#{@old_disk_path}.wiki"
result &&= move_repository("#{@old_wiki_disk_path}", "#{@new_disk_path}.wiki")
end
unless result
rollback_folder_move
project.storage_version = nil
end
project.repository_read_only = false
project.save!
if result && block_given?
yield
end
result
end
private
def move_repository(from_name, to_name)
from_exists = gitlab_shell.exists?(project.repository_storage_path, "#{from_name}.git")
to_exists = gitlab_shell.exists?(project.repository_storage_path, "#{to_name}.git")
# If we don't find the repository on either original or target we should log that as it could be an issue if the
# project was not originally empty.
if !from_exists && !to_exists
logger.warn "Can't find a repository on either source or target paths for #{project.full_path} (ID=#{project.id}) ..."
return false
elsif !from_exists
# Repository have been moved already.
return true
end
gitlab_shell.mv_repository(project.repository_storage_path, from_name, to_name)
end
def rollback_folder_move
move_repository(@new_disk_path, @old_disk_path)
move_repository("#{@new_disk_path}.wiki", "#{@old_disk_path}.wiki")
end
end
end
end
module Projects
class HashedStorageMigrationService < BaseService
include Gitlab::ShellAdapter
prepend ::EE::Projects::HashedStorageMigrationService
attr_reader :old_disk_path, :new_disk_path, :old_wiki_disk_path, :old_storage_version
attr_reader :logger
def initialize(project, logger = nil)
@project = project
@logger ||= Rails.logger
@logger = logger || Rails.logger
end
def execute
return if project.hashed_storage?(:repository)
@old_disk_path = project.disk_path
has_wiki = project.wiki.repository_exists?
@old_storage_version = project.storage_version
project.storage_version = Storage::HashedProject::STORAGE_VERSION
project.ensure_storage_path_exists
@new_disk_path = project.disk_path
result = move_repository(@old_disk_path, @new_disk_path)
if has_wiki
@old_wiki_disk_path = "#{@old_disk_path}.wiki"
result &&= move_repository(@old_wiki_disk_path, "#{@new_disk_path}.wiki")
# Migrate repository from Legacy to Hashed Storage
unless project.hashed_storage?(:repository)
return unless HashedStorage::MigrateRepositoryService.new(project, logger).execute
end
unless result
rollback_folder_move
return
# Migrate attachments from Legacy to Hashed Storage
unless project.hashed_storage?(:attachments)
HashedStorage::MigrateAttachmentsService.new(project, logger).execute
end
project.repository_read_only = false
project.save!
block_given? ? yield : result
end
private
def move_repository(from_name, to_name)
from_exists = gitlab_shell.exists?(project.repository_storage_path, "#{from_name}.git")
to_exists = gitlab_shell.exists?(project.repository_storage_path, "#{to_name}.git")
# If we don't find the repository on either original or target we should log that as it could be an issue if the
# project was not originally empty.
if !from_exists && !to_exists
logger.warn "Can't find a repository on either source or target paths for #{project.full_path} (ID=#{project.id}) ..."
return false
elsif !from_exists
# Repository have been moved already.
return true
end
gitlab_shell.mv_repository(project.repository_storage_path, from_name, to_name)
end
def rollback_folder_move
move_repository(@new_disk_path, @old_disk_path)
move_repository("#{@new_disk_path}.wiki", "#{@old_disk_path}.wiki")
end
def logger
@logger
end
end
end
......@@ -31,12 +31,19 @@ class FileUploader < GitlabUploader
# Returns a String without a trailing slash
def self.dynamic_path_segment(project)
if project.hashed_storage?(:attachments)
File.join(CarrierWave.root, base_dir, project.disk_path)
dynamic_path_builder(project.disk_path)
else
File.join(CarrierWave.root, base_dir, project.full_path)
dynamic_path_builder(project.full_path)
end
end
# Auxiliary method to build dynamic path segment when not using a project model
#
# Prefer to use the `.dynamic_path_segment` as it includes Hashed Storage specific logic
def self.dynamic_path_builder(path)
File.join(CarrierWave.root, base_dir, path)
end
attr_accessor :model
attr_reader :secret
......
module Geo
class HashedStorageAttachmentsMigrationWorker
include Sidekiq::Worker
include GeoQueue
def perform(project_id, old_attachments_path, new_attachments_path)
Geo::HashedStorageAttachmentsMigrationService.new(
project_id,
old_attachments_path: old_attachments_path,
new_attachments_path: new_attachments_path
).execute
end
end
end
......@@ -2,10 +2,34 @@ class ProjectMigrateHashedStorageWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
LEASE_TIMEOUT = 30.seconds.to_i
def perform(project_id)
project = Project.find_by(id: project_id)
return if project.nil? || project.pending_delete?
::Projects::HashedStorageMigrationService.new(project, logger).execute
uuid = lease_for(project_id).try_obtain
if uuid
::Projects::HashedStorageMigrationService.new(project, logger).execute
else
false
end
rescue => ex
cancel_lease_for(project_id, uuid) if uuid
raise ex
end
def lease_for(project_id)
Gitlab::ExclusiveLease.new(lease_key(project_id), timeout: LEASE_TIMEOUT)
end
private
def lease_key(project_id)
"project_migrate_hashed_storage_worker:#{project_id}"
end
def cancel_lease_for(project_id, uuid)
Gitlab::ExclusiveLease.cancel(lease_key(project_id), uuid)
end
end
---
title: 'Geo: replicate Attachments migration to Hashed Storage in secondary node'
merge_request: 3544
author:
type: added
---
title: Hashed Storage migration script now supports migrating project attachments
merge_request: 15352
author:
type: added
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddAttachmentsMigrationToGeoMigrationEvents < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
create_table :geo_hashed_storage_attachments_events, id: :bigserial do |t|
t.references :project, index: true, foreign_key: { on_delete: :cascade }, null: false
t.text :old_attachments_path, null: false
t.text :new_attachments_path, null: false
end
add_column :geo_event_log, :hashed_storage_attachments_event_id, :integer, limit: 8
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20171121144800) do
ActiveRecord::Schema.define(version: 20171124070437) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -882,6 +882,7 @@ ActiveRecord::Schema.define(version: 20171121144800) do
t.integer "repository_created_event_id", limit: 8
t.integer "hashed_storage_migrated_event_id", limit: 8
t.integer "lfs_object_deleted_event_id", limit: 8
t.integer "hashed_storage_attachments_event_id", limit: 8
end
add_index "geo_event_log", ["repositories_changed_event_id"], name: "index_geo_event_log_on_repositories_changed_event_id", using: :btree
......@@ -890,6 +891,14 @@ ActiveRecord::Schema.define(version: 20171121144800) do
add_index "geo_event_log", ["repository_renamed_event_id"], name: "index_geo_event_log_on_repository_renamed_event_id", using: :btree
add_index "geo_event_log", ["repository_updated_event_id"], name: "index_geo_event_log_on_repository_updated_event_id", using: :btree
create_table "geo_hashed_storage_attachments_events", id: :bigserial, force: :cascade do |t|
t.integer "project_id", null: false
t.text "old_attachments_path", null: false
t.text "new_attachments_path", null: false
end
add_index "geo_hashed_storage_attachments_events", ["project_id"], name: "index_geo_hashed_storage_attachments_events_on_project_id", using: :btree
create_table "geo_hashed_storage_migrated_events", id: :bigserial, force: :cascade do |t|
t.integer "project_id", null: false
t.text "repository_storage_name", null: false
......@@ -2450,6 +2459,7 @@ ActiveRecord::Schema.define(version: 20171121144800) do
add_foreign_key "geo_event_log", "geo_repository_deleted_events", column: "repository_deleted_event_id", name: "fk_c4b1c1f66e", on_delete: :cascade
add_foreign_key "geo_event_log", "geo_repository_renamed_events", column: "repository_renamed_event_id", name: "fk_86c84214ec", on_delete: :cascade
add_foreign_key "geo_event_log", "geo_repository_updated_events", column: "repository_updated_event_id", on_delete: :cascade
add_foreign_key "geo_hashed_storage_attachments_events", "projects", on_delete: :cascade
add_foreign_key "geo_hashed_storage_migrated_events", "projects", on_delete: :cascade
add_foreign_key "geo_node_namespace_links", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_node_namespace_links", "namespaces", on_delete: :cascade
......
# Repository Storage Rake Tasks
This is a collection of rake tasks you can use to help you list and migrate
existing projects from Legacy storage to the new Hashed storage type.
existing projects and attachments associated with it from Legacy storage to
the new Hashed storage type.
You can read more about the storage types [here][storage-types].
## Migrate existing projects to Hashed storage
Before migrating your existing projects, you should
[enable hashed storage][storage-migration] for the new projects as well.
This task will schedule all your existing projects and attachments associated with it to be migrated to the
**Hashed** storage type:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:migrate_to_hashed
```
**Source Installation**
```bash
rake gitlab:storage:migrate_to_hashed
```
You can monitor the progress in the _Admin > Monitoring > Background jobs_ screen.
There is a specific Queue you can watch to see how long it will take to finish: **project_migrate_hashed_storage**
After it reaches zero, you can confirm every project has been migrated by running the commands bellow.
If you find it necessary, you can run this migration script again to schedule missing projects.
Any error or warning will be logged in the sidekiq's log file.
You only need the `gitlab:storage:migrate_to_hashed` rake task to migrate your repositories, but we have additional
commands below that helps you inspect projects and attachments in both legacy and hashed storage.
## List projects on Legacy storage
To have a simple summary of projects using **Legacy** storage:
......@@ -73,35 +106,73 @@ rake gitlab:storage:list_hashed_projects
```
## Migrate existing projects to Hashed storage
## List attachments on Legacy storage
Before migrating your existing projects, you should
[enable hashed storage][storage-migration] for the new projects as well.
To have a simple summary of project attachments using **Legacy** storage:
This task will schedule all your existing projects to be migrated to the
**Hashed** storage type:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:legacy_attachments
```
**Source Installation**
```bash
rake gitlab:storage:legacy_attachments
```
------
To list project attachments using **Legacy** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:migrate_to_hashed
gitlab-rake gitlab:storage:list_legacy_attachments
```
**Source Installation**
```bash
rake gitlab:storage:migrate_to_hashed
rake gitlab:storage:list_legacy_attachments
```
You can monitor the progress in the _Admin > Monitoring > Background jobs_ screen.
There is a specific Queue you can watch to see how long it will take to finish: **project_migrate_hashed_storage**
## List attachments on Hashed storage
After it reaches zero, you can confirm every project has been migrated by running the commands above.
If you find it necessary, you can run this migration script again to schedule missing projects.
To have a simple summary of project attachments using **Hashed** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:hashed_attachments
```
Any error or warning will be logged in the sidekiq log file.
**Source Installation**
```bash
rake gitlab:storage:hashed_attachments
```
------
To list project attachments using **Hashed** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:list_hashed_attachments
```
**Source Installation**
```bash
rake gitlab:storage:list_hashed_attachments
```
[storage-types]: ../repository_storage_types.md
[storage-migration]: ../repository_storage_types.md#how-to-migrate-to-hashed-storage
module EE
module Projects
module HashedStorage
module MigrateAttachmentsService
def execute
raise NotImplementedError.new unless defined?(super)
super do
::Geo::HashedStorageAttachmentsEventStore.new(
project,
old_attachments_path: old_path,
new_attachments_path: new_path
).create
end
end
end
end
end
end
module EE
module Projects
module HashedStorage
module MigrateRepositoryService
def execute
raise NotImplementedError.new unless defined?(super)
super do
::Geo::HashedStorageMigratedEventStore.new(
project,
old_storage_version: old_storage_version,
old_disk_path: old_disk_path,
old_wiki_disk_path: old_wiki_disk_path
).create
end
end
end
end
end
end
module EE
module Projects
module HashedStorageMigrationService
def execute
raise NotImplementedError.new unless defined?(super)
super do
::Geo::HashedStorageMigratedEventStore.new(
project,
old_storage_version: old_storage_version,
old_disk_path: old_disk_path,
old_wiki_disk_path: old_wiki_disk_path
).create
end
end
end
end
end
......@@ -183,6 +183,23 @@ module Gitlab
job_id: job_id)
end
def handle_hashed_storage_attachments_event(event, created_at)
job_id = ::Geo::HashedStorageAttachmentsMigrationService.new(
event.project_id,
old_attachments_path: event.old_attachments_path,
new_attachments_path: event.new_attachments_path
).async_execute
logger.event_info(
created_at,
message: 'Migrating attachments to hashed storage',
project_id: event.project_id,
old_attachments_path: event.old_attachments_path,
new_attachments_path: event.new_attachments_path,
job_id: job_id
)
end
def handle_lfs_object_deleted_event(event, created_at)
file_path = File.join(LfsObjectUploader.local_store_path, event.file_path)
......
......@@ -2,10 +2,10 @@ namespace :gitlab do
namespace :storage do
desc 'GitLab | Storage | Migrate existing projects to Hashed Storage'
task migrate_to_hashed: :environment do
legacy_projects_count = Project.with_legacy_storage.count
legacy_projects_count = Project.with_unmigrated_storage.count
if legacy_projects_count == 0
puts 'There are no projects using legacy storage. Nothing to do!'
puts 'There are no projects requiring storage migration. Nothing to do!'
next
end
......@@ -23,22 +23,42 @@ namespace :gitlab do
desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage'
task legacy_projects: :environment do
projects_summary(Project.with_legacy_storage)
relation_summary('projects', Project.without_storage_feature(:repository))
end
desc 'Gitlab | Storage | List existing projects using Legacy Storage'
task list_legacy_projects: :environment do
projects_list(Project.with_legacy_storage)
projects_list('projects using Legacy Storage', Project.without_storage_feature(:repository))
end
desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage'
task hashed_projects: :environment do
projects_summary(Project.with_hashed_storage)
relation_summary('projects using Hashed Storage', Project.with_storage_feature(:repository))
end
desc 'Gitlab | Storage | List existing projects using Hashed Storage'
task list_hashed_projects: :environment do
projects_list(Project.with_hashed_storage)
projects_list('projects using Hashed Storage', Project.with_storage_feature(:repository))
end
desc 'Gitlab | Storage | Summary of project attachments using Legacy Storage'
task legacy_attachments: :environment do
relation_summary('attachments using Legacy Storage', legacy_attachments_relation)
end
desc 'Gitlab | Storage | List existing project attachments using Legacy Storage'
task list_legacy_attachments: :environment do
attachments_list('attachments using Legacy Storage', legacy_attachments_relation)
end
desc 'Gitlab | Storage | Summary of project attachments using Hashed Storage'
task hashed_attachments: :environment do
relation_summary('attachments using Hashed Storage', hashed_attachments_relation)
end
desc 'Gitlab | Storage | List existing project attachments using Hashed Storage'
task list_hashed_attachments: :environment do
attachments_list('attachments using Hashed Storage', hashed_attachments_relation)
end
def batch_size
......@@ -46,29 +66,43 @@ namespace :gitlab do
end
def project_id_batches(&block)
Project.with_legacy_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id)
yield ids.min, ids.max
end
end
def projects_summary(relation)
projects_count = relation.count
puts "* Found #{projects_count} projects".color(:green)
def legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def relation_summary(relation_name, relation)
relation_count = relation.count
puts "* Found #{relation_count} #{relation_name}".color(:green)
projects_count
relation_count
end
def projects_list(relation)
projects_count = projects_summary(relation)
def projects_list(relation_name, relation)
relation_count = relation_summary(relation_name, relation)
projects = relation.with_route
limit = ENV.fetch('LIMIT', 500).to_i
return unless projects_count > 0
return unless relation_count > 0
puts " ! Displaying first #{limit} projects..." if projects_count > limit
puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit
counter = 0
projects.find_in_batches(batch_size: batch_size) do |batch|
......@@ -81,5 +115,26 @@ namespace :gitlab do
end
end
end
def attachments_list(relation_name, relation)
relation_count = relation_summary(relation_name, relation)
limit = ENV.fetch('LIMIT', 500).to_i
return unless relation_count > 0
puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit
counter = 0
relation.find_in_batches(batch_size: batch_size) do |batch|
batch.each do |upload|
counter += 1
puts " - #{upload.path} (id: #{upload.id})".color(:red)
return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator
end
end
end
end
end
require 'spec_helper'
describe Projects::HashedStorage::MigrateAttachmentsService do
let(:project) { create(:project, storage_version: 1) }
let(:service) { described_class.new(project) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
let(:old_attachments_path) { legacy_storage.disk_path }
let(:new_attachments_path) { hashed_storage.disk_path }
describe '#execute' do
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
context 'on success' do
before do
FileUtils.mkdir_p(FileUploader.dynamic_path_builder(old_attachments_path))
end
it 'returns true' do
expect(service.execute).to be_truthy
end
it 'creates a Geo::HashedStorageAttachmentsEvent' do
expect { service.execute }.to change(Geo::EventLog, :count).by(1)
event = Geo::EventLog.first.event
expect(event).to be_a(Geo::HashedStorageAttachmentsEvent)
expect(event).to have_attributes(
old_attachments_path: old_attachments_path,
new_attachments_path: new_attachments_path
)
end
end
context 'on failure' do
it 'does not create a Geo event when skipped' do
expect { service.execute }.not_to change { Geo::EventLog.count }
end
it 'does not create a Geo event on failure' do
expect(service).to receive(:move_folder!).and_raise(::Projects::HashedStorage::AttachmentMigrationError)
expect { service.execute }.to raise_error(::Projects::HashedStorage::AttachmentMigrationError)
expect(Geo::EventLog.count).to eq(0)
end
end
end
end
require 'spec_helper'
describe Projects::HashedStorageMigrationService do
describe Projects::HashedStorage::MigrateRepositoryService do
let(:project) { create(:project, :empty_repo, :wiki_repo) }
let(:service) { described_class.new(project) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
......@@ -18,7 +18,7 @@ describe Projects::HashedStorageMigrationService do
expect(event).to be_a(Geo::HashedStorageMigratedEvent)
expect(event).to have_attributes(
old_storage_version: nil,
new_storage_version: Storage::HashedProject::STORAGE_VERSION,
new_storage_version: ::Project::HASHED_STORAGE_FEATURES[:repository],
old_disk_path: legacy_storage.disk_path,
new_disk_path: hashed_storage.disk_path,
old_wiki_disk_path: legacy_storage.disk_path + '.wiki',
......
......@@ -20,6 +20,10 @@ FactoryGirl.define do
hashed_storage_migrated_event factory: :geo_hashed_storage_migrated_event
end
trait :hashed_storage_attachments_event do
hashed_storage_attachments_event factory: :geo_hashed_storage_attachments_event
end
trait :lfs_object_deleted_event do
lfs_object_deleted_event factory: :geo_lfs_object_deleted_event
end
......@@ -78,7 +82,14 @@ FactoryGirl.define do
new_disk_path { project.path_with_namespace + '_new' }
old_wiki_disk_path { project.wiki.path_with_namespace }
new_wiki_disk_path { project.wiki.path_with_namespace + '_new' }
new_storage_version { Project::LATEST_STORAGE_VERSION }
new_storage_version { Project::HASHED_STORAGE_FEATURES[:repository] }
end
factory :geo_hashed_storage_attachments_event, class: Geo::HashedStorageAttachmentsEvent do
project { create(:project, :repository) }
old_attachments_path { Storage::LegacyProject.new(project).disk_path }
new_attachments_path { Storage::HashedProject.new(project).disk_path }
end
factory :geo_lfs_object_deleted_event, class: Geo::LfsObjectDeletedEvent do
......
......@@ -253,6 +253,27 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared
end
end
context 'when processing an attachment migration event to hashed storage' do
let(:event_log) { create(:geo_event_log, :hashed_storage_attachments_event) }
let!(:event_log_state) { create(:geo_event_log_state, event_id: event_log.id - 1) }
let(:hashed_storage_attachments_event) { event_log.hashed_storage_attachments_event }
it 'does not create a new project registry' do
expect { daemon.run_once! }.not_to change(Geo::ProjectRegistry, :count)
end
it 'schedules a Geo::HashedStorageAttachmentsMigrationWorker' do
project = hashed_storage_attachments_event.project
old_attachments_path = hashed_storage_attachments_event.old_attachments_path
new_attachments_path = hashed_storage_attachments_event.new_attachments_path
expect(::Geo::HashedStorageAttachmentsMigrationWorker).to receive(:perform_async)
.with(project.id, old_attachments_path, new_attachments_path)
daemon.run_once!
end
end
context 'when replaying a LFS object deleted event' do
let(:event_log) { create(:geo_event_log, :lfs_object_deleted_event) }
let!(:event_log_state) { create(:geo_event_log_state, event_id: event_log.id - 1) }
......
require 'spec_helper'
describe Geo::HashedStorageAttachmentsEventStore do
let(:project) { create(:project, :hashed, path: 'bar') }
let(:attachments_event) { build(:geo_hashed_storage_attachments_event, project: project) }
set(:secondary_node) { create(:geo_node) }
let(:old_attachments_path) { attachments_event.old_attachments_path }
let(:new_attachments_path) {attachments_event.new_attachments_path }
subject(:event_store) { described_class.new(project, old_storage_version: 1, new_storage_version: 2, old_attachments_path: old_attachments_path, new_attachments_path: new_attachments_path) }
describe '#create' do
it 'does not create an event when not running on a primary node' do
allow(Gitlab::Geo).to receive(:primary?) { false }
expect { event_store.create }.not_to change(Geo::HashedStorageAttachmentsEvent, :count)
end
context 'when running on a primary node' do
before do
allow(Gitlab::Geo).to receive(:primary?) { true }
end
it 'does not create an event when there are no secondary nodes' do
allow(Gitlab::Geo).to receive(:secondary_nodes) { [] }
expect { event_store.create }.not_to change(Geo::HashedStorageAttachmentsEvent, :count)
end
it 'creates a attachment migration event' do
expect { event_store.create }.to change(Geo::HashedStorageAttachmentsEvent, :count).by(1)
end
it 'tracks project attributes' do
event_store.create
event = Geo::HashedStorageAttachmentsEvent.last
expect(event).to have_attributes(
old_attachments_path: old_attachments_path,
new_attachments_path: new_attachments_path
)
end
end
end
end
require 'spec_helper'
describe Geo::HashedStorageAttachmentsMigrationService do
let!(:project) { create(:project) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
let!(:upload) { Upload.find_by(path: file_uploader.relative_path) }
let(:file_uploader) { build(:file_uploader, project: project) }
let(:old_path) { File.join(base_path(legacy_storage), upload.path) }
let(:new_path) { File.join(base_path(hashed_storage), upload.path) }
subject(:service) { described_class.new(project.id, old_attachments_path: legacy_storage.disk_path, new_attachments_path: hashed_storage.disk_path) }
describe '#execute' do
context 'when succeeds' do
it 'moves attachments to hashed storage layout' do
expect(File.file?(old_path)).to be_truthy
expect(File.file?(new_path)).to be_falsey
expect(File.exist?(base_path(legacy_storage))).to be_truthy
expect(File.exist?(base_path(hashed_storage))).to be_falsey
expect(FileUtils).to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage)).and_call_original
service.execute
expect(File.exist?(base_path(hashed_storage))).to be_truthy
expect(File.exist?(base_path(legacy_storage))).to be_falsey
expect(File.file?(old_path)).to be_falsey
expect(File.file?(new_path)).to be_truthy
end
end
context 'when original folder does not exist anymore' do
before do
FileUtils.rm_rf(base_path(legacy_storage))
end
it 'skips moving folders and go to next' do
expect(FileUtils).not_to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage))
service.execute
expect(File.exist?(base_path(hashed_storage))).to be_falsey
expect(File.file?(new_path)).to be_falsey
end
end
context 'when target folder already exists' do
before do
FileUtils.mkdir_p(base_path(hashed_storage))
end
it 'raises AttachmentMigrationError' do
expect(FileUtils).not_to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage))
expect { service.execute }.to raise_error(::Geo::AttachmentMigrationError)
end
end
end
describe '#async_execute' do
it 'starts the worker' do
expect(Geo::HashedStorageAttachmentsMigrationWorker).to receive(:perform_async)
service.async_execute
end
it 'returns job id' do
allow(Geo::HashedStorageAttachmentsMigrationWorker).to receive(:perform_async).and_return('foo')
expect(service.async_execute).to eq('foo')
end
end
def base_path(storage)
File.join(CarrierWave.root, FileUploader.base_dir, storage.disk_path)
end
end
......@@ -10,7 +10,7 @@ describe Geo::HashedStorageMigrationService do
describe '#execute' do
context 'project backed by legacy storage' do
before do
project.update_attribute(:storage_version, Project::LATEST_STORAGE_VERSION)
project.update_attribute(:storage_version, Project::HASHED_STORAGE_FEATURES[:repository])
end
it 'moves the project repositories' do
......
require 'spec_helper'
describe Projects::HashedStorage::MigrateAttachmentsService do
subject(:service) { described_class.new(project) }
let(:project) { create(:project) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
let!(:upload) { Upload.find_by(path: file_uploader.relative_path) }
let(:file_uploader) { build(:file_uploader, project: project) }
let(:old_path) { File.join(base_path(legacy_storage), upload.path) }
let(:new_path) { File.join(base_path(hashed_storage), upload.path) }
context '#execute' do
context 'when succeeds' do
it 'moves attachments to hashed storage layout' do
expect(File.file?(old_path)).to be_truthy
expect(File.file?(new_path)).to be_falsey
expect(File.exist?(base_path(legacy_storage))).to be_truthy
expect(File.exist?(base_path(hashed_storage))).to be_falsey
expect(FileUtils).to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage)).and_call_original
service.execute
expect(File.exist?(base_path(hashed_storage))).to be_truthy
expect(File.exist?(base_path(legacy_storage))).to be_falsey
expect(File.file?(old_path)).to be_falsey
expect(File.file?(new_path)).to be_truthy
end
end
context 'when original folder does not exist anymore' do
before do
FileUtils.rm_rf(base_path(legacy_storage))
end
it 'skips moving folders and go to next' do
expect(FileUtils).not_to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage))
service.execute
expect(File.exist?(base_path(hashed_storage))).to be_falsey
expect(File.file?(new_path)).to be_falsey
end
end
context 'when target folder already exists' do
before do
FileUtils.mkdir_p(base_path(hashed_storage))
end
it 'raises AttachmentMigrationError' do
expect(FileUtils).not_to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage))
expect { service.execute }.to raise_error(Projects::HashedStorage::AttachmentMigrationError)
end
end
end
def base_path(storage)
FileUploader.dynamic_path_builder(storage.disk_path)
end
end
require 'spec_helper'
describe Projects::HashedStorage::MigrateRepositoryService do
let(:gitlab_shell) { Gitlab::Shell.new }
let(:project) { create(:project, :empty_repo, :wiki_repo) }
let(:service) { described_class.new(project) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
describe '#execute' do
before do
allow(service).to receive(:gitlab_shell) { gitlab_shell }
end
context 'when succeeds' do
it 'renames project and wiki repositories' do
service.execute
expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.git")).to be_truthy
expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.wiki.git")).to be_truthy
end
it 'updates project to be hashed and not read-only' do
service.execute
expect(project.hashed_storage?(:repository)).to be_truthy
expect(project.repository_read_only).to be_falsey
end
it 'move operation is called for both repositories' do
expect_move_repository(project.disk_path, hashed_storage.disk_path)
expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki")
service.execute
end
end
context 'when one move fails' do
it 'rollsback repositories to original name' do
from_name = project.disk_path
to_name = hashed_storage.disk_path
allow(service).to receive(:move_repository).and_call_original
allow(service).to receive(:move_repository).with(from_name, to_name).once { false } # will disable first move only
expect(service).to receive(:rollback_folder_move).and_call_original
service.execute
expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.git")).to be_falsey
expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.wiki.git")).to be_falsey
expect(project.repository_read_only?).to be_falsey
end
context 'when rollback fails' do
let(:from_name) { legacy_storage.disk_path }
let(:to_name) { hashed_storage.disk_path }
before do
hashed_storage.ensure_storage_path_exists
gitlab_shell.mv_repository(project.repository_storage_path, from_name, to_name)
end
it 'does not try to move nil repository over hashed' do
expect(gitlab_shell).not_to receive(:mv_repository).with(project.repository_storage_path, from_name, to_name)
expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki")
service.execute
end
end
end
def expect_move_repository(from_name, to_name)
expect(gitlab_shell).to receive(:mv_repository).with(project.repository_storage_path, from_name, to_name).and_call_original
end
end
end
require 'spec_helper'
describe Projects::HashedStorageMigrationService do
let(:gitlab_shell) { Gitlab::Shell.new }
let(:project) { create(:project, :empty_repo, :wiki_repo) }
let(:service) { described_class.new(project) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
subject(:service) { described_class.new(project) }
describe '#execute' do
before do
allow(service).to receive(:gitlab_shell) { gitlab_shell }
end
context 'when succeeds' do
it 'renames project and wiki repositories' do
service.execute
context 'repository migration' do
let(:repository_service) { Projects::HashedStorage::MigrateRepositoryService.new(project, subject.logger) }
expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.git")).to be_truthy
expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.wiki.git")).to be_truthy
end
it 'delegates migration to Projects::HashedStorage::MigrateRepositoryService' do
expect(Projects::HashedStorage::MigrateRepositoryService).to receive(:new).with(project, subject.logger).and_return(repository_service)
expect(repository_service).to receive(:execute)
it 'updates project to be hashed and not read-only' do
service.execute
expect(project.hashed_storage?(:repository)).to be_truthy
expect(project.repository_read_only).to be_falsey
end
it 'move operation is called for both repositories' do
expect_move_repository(project.disk_path, hashed_storage.disk_path)
expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki")
it 'does not delegate migration if repository is already migrated' do
project.storage_version = ::Project::LATEST_STORAGE_VERSION
expect(Projects::HashedStorage::MigrateRepositoryService).not_to receive(:new)
service.execute
end
end
context 'when one move fails' do
it 'rollsback repositories to original name' do
from_name = project.disk_path
to_name = hashed_storage.disk_path
allow(service).to receive(:move_repository).and_call_original
allow(service).to receive(:move_repository).with(from_name, to_name).once { false } # will disable first move only
context 'attachments migration' do
let(:attachments_service) { Projects::HashedStorage::MigrateAttachmentsService.new(project, subject.logger) }
expect(service).to receive(:rollback_folder_move).and_call_original
it 'delegates migration to Projects::HashedStorage::MigrateRepositoryService' do
expect(Projects::HashedStorage::MigrateAttachmentsService).to receive(:new).with(project, subject.logger).and_return(attachments_service)
expect(attachments_service).to receive(:execute)
service.execute
expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.git")).to be_falsey
expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.wiki.git")).to be_falsey
end
context 'when rollback fails' do
before do
from_name = legacy_storage.disk_path
to_name = hashed_storage.disk_path
it 'does not delegate migration if attachments are already migrated' do
project.storage_version = ::Project::LATEST_STORAGE_VERSION
expect(Projects::HashedStorage::MigrateAttachmentsService).not_to receive(:new)
hashed_storage.ensure_storage_path_exists
gitlab_shell.mv_repository(project.repository_storage_path, from_name, to_name)
end
it 'does not try to move nil repository over hashed' do
expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki")
service.execute
end
service.execute
end
end
def expect_move_repository(from_name, to_name)
expect(gitlab_shell).to receive(:mv_repository).with(project.repository_storage_path, from_name, to_name).and_call_original
end
end
end
require 'spec_helper'
describe ProjectMigrateHashedStorageWorker do
describe ProjectMigrateHashedStorageWorker, :clean_gitlab_redis_shared_state do
describe '#perform' do
let(:project) { create(:project, :empty_repo) }
let(:pending_delete_project) { create(:project, :empty_repo, pending_delete: true) }
it 'skips when project no longer exists' do
nonexistent_id = 999999999999
context 'when have exclusive lease' do
before do
lease = subject.lease_for(project.id)
expect(::Projects::HashedStorageMigrationService).not_to receive(:new)
subject.perform(nonexistent_id)
end
allow(Gitlab::ExclusiveLease).to receive(:new).and_return(lease)
allow(lease).to receive(:try_obtain).and_return(true)
end
it 'skips when project no longer exists' do
nonexistent_id = 999999999999
expect(::Projects::HashedStorageMigrationService).not_to receive(:new)
subject.perform(nonexistent_id)
end
it 'skips when project is pending delete' do
expect(::Projects::HashedStorageMigrationService).not_to receive(:new)
it 'skips when project is pending delete' do
expect(::Projects::HashedStorageMigrationService).not_to receive(:new)
subject.perform(pending_delete_project.id)
end
subject.perform(pending_delete_project.id)
it 'delegates removal to service class' do
service = double('service')
expect(::Projects::HashedStorageMigrationService).to receive(:new).with(project, subject.logger).and_return(service)
expect(service).to receive(:execute)
subject.perform(project.id)
end
end
it 'delegates removal to service class' do
service = double('service')
expect(::Projects::HashedStorageMigrationService).to receive(:new).with(project, subject.logger).and_return(service)
expect(service).to receive(:execute)
context 'when dont have exclusive lease' do
before do
lease = subject.lease_for(project.id)
allow(Gitlab::ExclusiveLease).to receive(:new).and_return(lease)
allow(lease).to receive(:try_obtain).and_return(false)
end
it 'skips when dont have lease' do
expect(::Projects::HashedStorageMigrationService).not_to receive(:new)
subject.perform(project.id)
subject.perform(project.id)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment