Commit b3139aa7 authored by Micaël Bergeron's avatar Micaël Bergeron

make the rake task easier to test

parent 38bf711b
...@@ -70,6 +70,12 @@ class Upload < ActiveRecord::Base ...@@ -70,6 +70,12 @@ class Upload < ActiveRecord::Base
store == ObjectStorage::Store::LOCAL store == ObjectStorage::Store::LOCAL
end end
private
def checksummable?
checksum.nil? && local? && exist?
end
def foreground_checksummable? def foreground_checksummable?
checksummable? && size <= CHECKSUM_THRESHOLD checksummable? && size <= CHECKSUM_THRESHOLD
end end
......
module Gitlab module Gitlab
module Utils module Utils
include BisectEnumerable
extend self extend self
# Run system command without outputting to stdout. # Run system command without outputting to stdout.
......
module Gitlab
module Utils
module BisectEnumerable
extend self
# Bisect an enumerable by using &block as pivot.
# Return two arrays, depending on the result of the pivot.
# [e] -> [[e]: pivot(e) == true, [e]: pivot(e) == false]
#
# Example: odd, even = bisect((1..10), &:odd?)
def bisect(enumerable, &block)
return [[], []] unless enumerable.any?
bisect = enumerable.group_by(&block)
[bisect.fetch(true, []), bisect.fetch(false, [])]
end
end
end
end
namespace :gitlab do
namespace :uploads do
desc 'GitLab | Uploads | Check integrity of uploaded files'
task check: :environment do
puts 'Checking integrity of uploaded files'
uploads_batches do |batch|
batch.each do |upload|
puts "- Checking file (#{upload.id}): #{upload.absolute_path}".color(:green)
if upload.exist?
check_checksum(upload)
else
puts " * File does not exist on the file system".color(:red)
end
end
end
puts 'Done!'
end
desc 'GitLab | Uploads | Migrate the uploaded files to object storage'
task :migrate, [:uploader_class, :model_class, :mounted_as] => :environment do |task, args|
MIGRATE_TO_STORE = ObjectStorage::Store::REMOTE
class MigrationResult
attr_reader :upload
attr_accessor :error
def initialize(upload, error = nil)
@upload, @error = upload, error
end
def success?
error.nil?
end
def to_s
success? ? "Migration sucessful." : "Error while migrating #{upload.id}: #{error.message}"
end
end
uploader_class = args.uploader_class.constantize
model_class = args.model_class.constantize
mounted_as = args.mounted_as&.gsub(':', '')&.to_sym
global_results = []
Upload.preload(:model)
.where.not(store: MIGRATE_TO_STORE)
.where(uploader: uploader_class.to_s,
model_type: model_class.to_s)
.in_batches(of: batch_size) do |batch|
results = migrate(build_uploaders(batch, mounted_as))
report(results)
global_results.concat(results)
end
puts "\n === Migration summary ==="
report(global_results)
end
def report(results)
results = results.group_by(&:success?)
success, errors = [
results.fetch(true, []),
results.fetch(false, [])
]
batch_color = errors.count == 0 ? :green : :red
puts "Migrated #{success.count}/#{success.count + errors.count} files.".color(batch_color)
errors.each { |e| puts("\t#{e}").color(:red) }
end
def build_uploaders(uploads, mounted_as)
uploads.map { |upload| upload.build_uploader(mounted_as) }
end
def migrate(uploaders)
uploaders.map do |uploader|
result = MigrationResult.new(uploader.upload)
begin
uploader.migrate!(MIGRATE_TO_STORE)
result
rescue CarrierWave::UploadError => e
result.error = e
result
end
end
end
def batch_size
ENV.fetch('BATCH', 200).to_i
end
def calculate_checksum(absolute_path)
Digest::SHA256.file(absolute_path).hexdigest
end
def check_checksum(upload)
checksum = calculate_checksum(upload.absolute_path)
if checksum != upload.checksum
puts " * File checksum (#{checksum}) does not match the one in the database (#{upload.checksum})".color(:red)
end
end
def uploads_batches(&block)
Upload.all.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
yield relation
end
end
end
end
require_relative 'helpers.rb'
include UploadTaskHelpers
namespace :gitlab do
namespace :uploads do
desc 'GitLab | Uploads | Check integrity of uploaded files'
task check: :environment do
puts 'Checking integrity of uploaded files'
uploads_batches do |batch|
batch.each do |upload|
begin
puts "- Checking file (#{upload.id}): #{upload.absolute_path}".color(:green)
if upload.exist?
check_checksum(upload)
else
puts " * File does not exist on the file system".color(:red)
end
rescue ObjectStorage::RemoteStoreError
puts "- File (#{upload.id}): File is stored remotely, skipping".color(:yellow)
end
end
end
puts 'Done!'
end
end
end
module UploadTaskHelpers
def batch_size
ENV.fetch('BATCH', 200).to_i
end
def calculate_checksum(absolute_path)
Digest::SHA256.file(absolute_path).hexdigest
end
def check_checksum(upload)
checksum = calculate_checksum(upload.absolute_path)
if checksum != upload.checksum
puts " * File checksum (#{checksum}) does not match the one in the database (#{upload.checksum})".color(:red)
end
end
def uploads_batches(&block)
Upload.all.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
yield relation
end
end
end
require 'gitlab/utils/bisect_enumerable.rb'
require_relative 'helpers.rb'
module UploadTask
module Migrate
class MigrationResult
attr_reader :upload
attr_accessor :error
def initialize(upload, error = nil)
@upload, @error = upload, error
end
def success?
error.nil?
end
def to_s
success? ? "Migration sucessful." : "Error while migrating #{upload.id}: #{error.message}"
end
end
class Reporter
def initialize(results = [])
@success, @failures = Gitlab::Utils::BisectEnumerable.bisect(results, &:success?)
end
def report
puts header
puts failures
end
def header
color = @failures.count == 0 ? :green : :red
"Migrated #{@success.count}/#{@success.count + @failures.count} files.".color(color)
end
def failures
@failures.map { |f| "\t#{f}".color(:red) }.join('\n')
end
end
class Migrator
attr_reader :to_store
def initialize(uploader_class, model_class, mounted_as, to_store)
@results = []
@uploader_class, @model_class = uploader_class, model_class
@mounted_as = mounted_as
@to_store = to_store
end
def build_uploaders(uploads)
uploads.map { |upload| upload.build_uploader(@mounted_as) }
end
def migrate(batch_size, &block)
each_upload_batch(batch_size) do |batch|
results = build_uploaders(batch)
.map(&method(:process_uploader))
yield results # yield processed batch as [MigrationResult]
@results.concat(results)
end
end
def report
Reporter.new(@results).report
end
def each_upload_batch(batch_size, &block)
Upload.preload(:model)
.where.not(store: @to_store)
.where(uploader: @uploader_class.to_s,
model_type: @model_class.to_s)
.in_batches(of: batch_size, &block)
end
def process_uploader(uploader)
result = MigrationResult.new(uploader.upload)
begin
uploader.migrate!(@to_store)
result
rescue => e
result.error = e
result
end
end
end
end
end
namespace :gitlab do
namespace :uploads do
desc 'GitLab | Uploads | Migrate the uploaded files to object storage'
task :migrate, [:uploader_class, :model_class, :mounted_as] => :environment do |task, args|
uploader_class = args.uploader_class.constantize
model_class = args.model_class.constantize
mounted_as = args.mounted_as&.gsub(':', '')&.to_sym
migrator = UploadTask::Migrate::Migrator.new(
uploader_class,
model_class,
mounted_as,
ObjectStorage::Store::LOCAL
)
migrator.migrate(batch_size) do |results|
UploadTask::Migrate::Reporter.new(results).report
end
puts "\n === Migration summary ==="
migrator.report
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment