Commit 9a2cb68d authored by Gabriel Mazetto's avatar Gabriel Mazetto

Improve storage migration rake task

parent 7ac92899
...@@ -277,8 +277,9 @@ class Project < ActiveRecord::Base ...@@ -277,8 +277,9 @@ class Project < ActiveRecord::Base
scope :pending_delete, -> { where(pending_delete: true) } scope :pending_delete, -> { where(pending_delete: true) }
scope :without_deleted, -> { where(pending_delete: false) } scope :without_deleted, -> { where(pending_delete: false) }
scope :with_hashed_storage, -> { where('storage_version >= 1') } scope :with_storage_feature, ->(feature) { where('storage_version >= :version', version: HASHED_STORAGE_FEATURES[feature]) }
scope :with_legacy_storage, -> { where(storage_version: [nil, 0]) } scope :without_storage_feature, ->(feature) { where('storage_version < :version OR storage_version IS NULL', version: HASHED_STORAGE_FEATURES[feature]) }
scope :with_unmigrated_storage, -> { where('storage_version < :version OR storage_version IS NULL', version: LATEST_STORAGE_VERSION) }
scope :sorted_by_activity, -> { reorder(last_activity_at: :desc) } scope :sorted_by_activity, -> { reorder(last_activity_at: :desc) }
scope :sorted_by_stars, -> { reorder('projects.star_count DESC') } scope :sorted_by_stars, -> { reorder('projects.star_count DESC') }
......
...@@ -2,10 +2,10 @@ namespace :gitlab do ...@@ -2,10 +2,10 @@ namespace :gitlab do
namespace :storage do namespace :storage do
desc 'GitLab | Storage | Migrate existing projects to Hashed Storage' desc 'GitLab | Storage | Migrate existing projects to Hashed Storage'
task migrate_to_hashed: :environment do task migrate_to_hashed: :environment do
legacy_projects_count = Project.with_legacy_storage.count legacy_projects_count = Project.with_unmigrated_storage.count
if legacy_projects_count == 0 if legacy_projects_count == 0
puts 'There are no projects using legacy storage. Nothing to do!' puts 'There are no projects requiring storage migration. Nothing to do!'
next next
end end
...@@ -23,22 +23,42 @@ namespace :gitlab do ...@@ -23,22 +23,42 @@ namespace :gitlab do
desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage' desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage'
task legacy_projects: :environment do task legacy_projects: :environment do
projects_summary(Project.with_legacy_storage) relation_summary('projects', Project.without_storage_feature(:repository))
end end
desc 'Gitlab | Storage | List existing projects using Legacy Storage' desc 'Gitlab | Storage | List existing projects using Legacy Storage'
task list_legacy_projects: :environment do task list_legacy_projects: :environment do
projects_list(Project.with_legacy_storage) projects_list('projects using Legacy Storage', Project.without_storage_feature(:repository))
end end
desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage' desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage'
task hashed_projects: :environment do task hashed_projects: :environment do
projects_summary(Project.with_hashed_storage) relation_summary('projects using Hashed Storage', Project.with_storage_feature(:repository))
end end
desc 'Gitlab | Storage | List existing projects using Hashed Storage' desc 'Gitlab | Storage | List existing projects using Hashed Storage'
task list_hashed_projects: :environment do task list_hashed_projects: :environment do
projects_list(Project.with_hashed_storage) projects_list('projects using Hashed Storage', Project.with_storage_feature(:repository))
end
desc 'Gitlab | Storage | Summary of project attachments using Legacy Storage'
task legacy_attachments: :environment do
relation_summary('attachments using Legacy Storage', legacy_attachments_relation)
end
desc 'Gitlab | Storage | List existing project attachments using Legacy Storage'
task list_legacy_attachments: :environment do
attachments_list('attachments using Legacy Storage', legacy_attachments_relation)
end
desc 'Gitlab | Storage | Summary of project attachments using Hashed Storage'
task hashed_attachments: :environment do
relation_summary('attachments using Hashed Storage', hashed_attachments_relation)
end
desc 'Gitlab | Storage | List existing project attachments using Hashed Storage'
task list_hashed_attachments: :environment do
attachments_list('attachments using Hashed Storage', hashed_attachments_relation)
end end
def batch_size def batch_size
...@@ -46,29 +66,43 @@ namespace :gitlab do ...@@ -46,29 +66,43 @@ namespace :gitlab do
end end
def project_id_batches(&block) def project_id_batches(&block)
Project.with_legacy_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id) ids = relation.pluck(:id)
yield ids.min, ids.max yield ids.min, ids.max
end end
end end
def projects_summary(relation) def legacy_attachments_relation
projects_count = relation.count Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
puts "* Found #{projects_count} projects".color(:green) JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def relation_summary(relation_name, relation)
relation_count = relation.count
puts "* Found #{relation_count} #{relation_name}".color(:green)
projects_count relation_count
end end
def projects_list(relation) def projects_list(relation_name, relation)
projects_count = projects_summary(relation) relation_count = relation_summary(relation_name, relation)
projects = relation.with_route projects = relation.with_route
limit = ENV.fetch('LIMIT', 500).to_i limit = ENV.fetch('LIMIT', 500).to_i
return unless projects_count > 0 return unless relation_count > 0
puts " ! Displaying first #{limit} projects..." if projects_count > limit puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit
counter = 0 counter = 0
projects.find_in_batches(batch_size: batch_size) do |batch| projects.find_in_batches(batch_size: batch_size) do |batch|
...@@ -81,5 +115,26 @@ namespace :gitlab do ...@@ -81,5 +115,26 @@ namespace :gitlab do
end end
end end
end end
def attachments_list(relation_name, relation)
relation_count = relation_summary(relation_name, relation)
limit = ENV.fetch('LIMIT', 500).to_i
return unless relation_count > 0
puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit
counter = 0
relation.find_in_batches(batch_size: batch_size) do |batch|
batch.each do |upload|
counter += 1
puts " - #{upload.path} (id: #{upload.id})".color(:red)
return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator
end
end
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment