Commit 7ed2aee7 authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge branch 'tc-rake-orphan-artifacts-ee' into 'master'

Add rake task to clean orphan artifact files and Geo registries

See merge request gitlab-org/gitlab-ee!14140
parents 606812c1 79c7f66f
---
title: Add rake task to clean orphan artifact files
merge_request: 29681
author:
type: added
......@@ -504,6 +504,15 @@ To resolve this, run the following command:
sudo gitlab-rake geo:db:refresh_foreign_tables
```
## Expired artifacts
If you notice for some reason there are more artifacts on the Geo
secondary node than on the Geo primary node, you can use the rake task
to [cleanup orphan artifact files](../../../raketasks/cleanup.md#remove-orphan-artifact-files).
On a Geo **secondary** node, this command will also clean up all Geo
registry record related to the orphan files on disk.
## Fixing common errors
This section documents common errors reported in the Admin UI and how to fix them.
......
......@@ -92,3 +92,48 @@ I, [2018-08-02T10:26:47.598424 #45087] INFO -- : Looking for orphaned remote up
I, [2018-08-02T10:26:47.753131 #45087] INFO -- : Moved to lost and found: @hashed/6b/DSC_6152.JPG -> lost_and_found/@hashed/6b/DSC_6152.JPG
I, [2018-08-02T10:26:47.764356 #45087] INFO -- : Moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg -> lost_and_found/@hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg
```
## Remove orphan artifact files
When you notice there are more job artifacts files on disk than there
should be, you can run:
```shell
gitlab-rake gitlab:cleanup:orphan_job_artifact_files
```
This command:
- Scans through the entire artifacts folder.
- Checks which files still have a record in the database.
- If no database record is found, the file is deleted from disk.
By default, this task does not delete anything but shows what it can
delete. Run the command with `DRY_RUN=false` if you actually want to
delete the files:
```shell
gitlab-rake gitlab:cleanup:orphan_job_artifact_files DRY_RUN=false
```
You can also limit the number of files to delete with `LIMIT`:
```shell
gitlab-rake gitlab:cleanup:orphan_job_artifact_files LIMIT=100`
```
This will only delete up to 100 files from disk. You can use this to
delete a small set for testing purposes.
If you provide `DEBUG=1`, you'll see the full path of every file that
is detected as being an orphan.
If `ionice` is installed, the tasks uses it to ensure the command is
not causing too much load on the disk. You can configure the niceness
level with `NICENESS`. Below are the valid levels, but consult
`man 1 ionice` to be sure.
- `0` or `None`
- `1` or `Realtime`
- `2` or `Best-effort` (default)
- `3` or `Idle`
......@@ -3,6 +3,10 @@
class Geo::JobArtifactRegistry < Geo::BaseRegistry
include Geo::Syncable
def self.artifact_id_in(ids)
where(artifact_id: ids)
end
def self.artifact_id_not_in(ids)
where.not(artifact_id: ids)
end
......
# frozen_string_literal: true
module EE
module Gitlab
module Cleanup
module OrphanJobArtifactFiles
extend ::Gitlab::Utils::Override
attr_accessor :total_geo_registries
def initialize(*)
super
@total_geo_registries = 0
end
override :run!
def run!
super
if ::Gitlab::Geo.secondary?
log_info("... and delete #{total_geo_registries} Geo registry records.")
end
end
override :update_stats!
def update_stats!(batch)
super
if ::Gitlab::Geo.secondary?
self.total_geo_registries += batch.geo_registries_count
end
end
end
end
end
end
# frozen_string_literal: true
module EE
module Gitlab
module Cleanup
module OrphanJobArtifactFilesBatch
extend ::Gitlab::Utils::Override
attr_accessor :geo_registries_count
override :clean!
def clean!
super
clean_geo_registries! if ::Gitlab::Geo.secondary?
end
def clean_geo_registries!
self.geo_registries_count =
if dry_run
lost_and_found_geo_registries.count
else
lost_and_found_geo_registries.delete_all
end
end
def lost_and_found_ids
@lost_and_found_ids ||= lost_and_found.map(&:artifact_id)
end
def lost_and_found_geo_registries
@lost_and_found_geo_registries ||= ::Geo::JobArtifactRegistry.artifact_id_in(lost_and_found_ids)
end
end
end
end
end
......@@ -4,10 +4,21 @@ FactoryBot.define do
success true
trait :with_artifact do
after(:build, :stub) do |registry, _|
file = create(:ci_job_artifact)
transient do
artifact_type { nil } # e.g. :archive, :metadata, :trace ...
end
after(:build, :stub) do |registry, evaluator|
file = create(:ci_job_artifact, evaluator.artifact_type)
registry.artifact_id = file.id
end
end
trait :orphan do
with_artifact
after(:create) do |registry, _|
Ci::JobArtifact.find(registry.artifact_id).delete
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Cleanup::OrphanJobArtifactFilesBatch do
include ::EE::GeoHelpers
let(:batch_size) { 10 }
let(:dry_run) { true }
subject(:batch) { described_class.new(batch_size: batch_size, dry_run: dry_run) }
context 'Geo secondary' do
let(:max_artifact_id) { Ci::JobArtifact.maximum(:id).to_i }
let(:orphan_id_1) { max_artifact_id + 1 }
let(:orphan_id_2) { max_artifact_id + 2 }
let!(:orphan_registry_1) { create(:geo_job_artifact_registry, artifact_id: orphan_id_1) }
let!(:orphan_registry_2) { create(:geo_job_artifact_registry, artifact_id: orphan_id_2) }
before do
stub_secondary_node
batch << "/tmp/foo/bar/#{orphan_id_1}"
batch << "/tmp/foo/bar/#{orphan_id_2}"
end
context 'no dry run' do
let(:dry_run) { false }
it 'deletes registries for the found artifacts' do
expect { batch.clean! }.to change { Geo::JobArtifactRegistry.count }.by(-2)
expect(batch.geo_registries_count).to eq(2)
end
end
context 'with dry run' do
it 'does not remove registries' do
create(:geo_job_artifact_registry, :with_artifact, artifact_type: :archive)
create(:geo_job_artifact_registry, :orphan, artifact_type: :archive)
expect { batch.clean! }.not_to change { Geo::JobArtifactRegistry.count }
expect(batch.geo_registries_count).to eq(2)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Cleanup::OrphanJobArtifactFiles do
include ::EE::GeoHelpers
let(:null_logger) { Logger.new('/dev/null') }
subject(:cleanup) { described_class.new(logger: null_logger) }
before do
allow(null_logger).to receive(:info)
end
context 'not a Geo secondary' do
it 'does not print cleaning Geo registries message' do
expect(null_logger).not_to receive(:info).with(/Geo/)
cleanup.run!
end
end
context 'Geo secondary', :geo do
before do
stub_secondary_node
end
it 'prints cleaning Geo registries message' do
expect(null_logger).to receive(:info).with(/delete \d+ Geo registry records/)
cleanup.run!
end
it 'accumulates the number of cleaned Geo registries' do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
create_list(:geo_job_artifact_registry, 3, :orphan, artifact_type: :archive)
create(:ci_job_artifact, :archive).delete
cleanup.run!
expect(cleanup.total_geo_registries).to eq(3)
end
end
end
# frozen_string_literal: true
module Gitlab
module Cleanup
class OrphanJobArtifactFiles
include Gitlab::Utils::StrongMemoize
ABSOLUTE_ARTIFACT_DIR = ::JobArtifactUploader.root.freeze
LOST_AND_FOUND = File.join(ABSOLUTE_ARTIFACT_DIR, '-', 'lost+found').freeze
BATCH_SIZE = 500
DEFAULT_NICENESS = 'Best-effort'
attr_accessor :batch, :total_found, :total_cleaned
attr_reader :limit, :dry_run, :niceness, :logger
def initialize(limit: nil, dry_run: true, niceness: nil, logger: nil)
@limit = limit
@dry_run = dry_run
@niceness = niceness || DEFAULT_NICENESS
@logger = logger || Rails.logger
@total_found = @total_cleaned = 0
new_batch!
end
def run!
log_info('Looking for orphan job artifacts to clean up')
find_artifacts do |artifact_file|
batch << artifact_file
clean_batch! if batch.full?
break if limit_reached?
end
clean_batch!
log_info("Processed #{total_found} job artifacts to find and clean #{total_cleaned} orphans.")
end
private
def new_batch!
self.batch = ::Gitlab::Cleanup::OrphanJobArtifactFilesBatch
.new(batch_size: batch_size, logger: logger, dry_run: dry_run)
end
def clean_batch!
batch.clean!
update_stats!(batch)
new_batch!
end
def update_stats!(batch)
self.total_found += batch.artifact_files.count
self.total_cleaned += batch.lost_and_found.count
end
def limit_reached?
return false unless limit
total_cleaned >= limit
end
def batch_size
return BATCH_SIZE unless limit
return if limit_reached?
todo = limit - total_cleaned
[BATCH_SIZE, todo].min
end
def find_artifacts
Open3.popen3(*find_command) do |stdin, stdout, stderr, status_thread|
stdout.each_line do |line|
yield line
end
log_error(stderr.read.color(:red)) unless status_thread.value.success?
end
end
def find_command
strong_memoize(:find_command) do
cmd = %W[find -L #{absolute_artifact_dir}]
# Search for Job Artifact IDs, they are found 6 directory
# levels deep. For example:
# shared/artifacts/2c/62/2c...a3/2019_02_27/836/628/job.log
# 1 2 3 4 5 6
# | | | ^- date | ^- Job Artifact ID
# | | | ^- Job ID
# ^--+--+- components of hashed storage project path
cmd += %w[-mindepth 6 -maxdepth 6]
# Artifact directories are named on their ID
cmd += %w[-type d]
if ionice
raise ArgumentError, 'Invalid niceness' unless niceness.match?(/^\w[\w\-]*$/)
cmd.unshift(*%W[#{ionice} --class #{niceness}])
end
log_info("find command: '#{cmd.join(' ')}'")
cmd
end
end
def absolute_artifact_dir
File.absolute_path(ABSOLUTE_ARTIFACT_DIR)
end
def ionice
strong_memoize(:ionice) do
Gitlab::Utils.which('ionice')
end
end
def log_info(msg, params = {})
logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
end
def log_error(msg, params = {})
logger.error(msg)
end
end
end
end
Gitlab::Cleanup::OrphanJobArtifactFiles.prepend(EE::Gitlab::Cleanup::OrphanJobArtifactFiles)
# frozen_string_literal: true
module Gitlab
module Cleanup
class OrphanJobArtifactFilesBatch
BatchFull = Class.new(StandardError)
class ArtifactFile
attr_accessor :path
def initialize(path)
@path = path
end
def artifact_id
path.split('/').last.to_i
end
end
include Gitlab::Utils::StrongMemoize
attr_reader :batch_size, :dry_run
attr_accessor :artifact_files
def initialize(batch_size:, dry_run: true, logger: Rails.logger)
@batch_size = batch_size
@dry_run = dry_run
@logger = logger
@artifact_files = []
end
def clean!
return if artifact_files.empty?
lost_and_found.each do |artifact|
clean_one!(artifact)
end
end
def full?
artifact_files.count >= batch_size
end
def <<(artifact_path)
raise BatchFull, "Batch full! Already contains #{artifact_files.count} artifacts" if full?
artifact_files << ArtifactFile.new(artifact_path)
end
def lost_and_found
strong_memoize(:lost_and_found) do
artifact_file_ids = artifact_files.map(&:artifact_id)
existing_artifact_ids = ::Ci::JobArtifact.id_in(artifact_file_ids).pluck_primary_key
artifact_files.reject { |artifact| existing_artifact_ids.include?(artifact.artifact_id) }
end
end
private
def clean_one!(artifact_file)
log_debug("Found orphan job artifact file @ #{artifact_file.path}")
remove_file!(artifact_file) unless dry_run
end
def remove_file!(artifact_file)
FileUtils.rm_rf(artifact_file.path)
end
def log_info(msg, params = {})
@logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
end
def log_debug(msg, params = {})
@logger.debug(msg)
end
end
end
end
Gitlab::Cleanup::OrphanJobArtifactFilesBatch.prepend(EE::Gitlab::Cleanup::OrphanJobArtifactFilesBatch)
......@@ -115,6 +115,18 @@ namespace :gitlab do
end
end
desc 'GitLab | Cleanup | Clean orphan job artifact files'
task orphan_job_artifact_files: :gitlab_environment do
warn_user_is_not_gitlab
cleaner = Gitlab::Cleanup::OrphanJobArtifactFiles.new(limit: limit, dry_run: dry_run?, niceness: niceness, logger: logger)
cleaner.run!
if dry_run?
logger.info "To clean up these files run this command with DRY_RUN=false".color(:yellow)
end
end
def remove?
ENV['REMOVE'] == 'true'
end
......@@ -123,12 +135,25 @@ namespace :gitlab do
ENV['DRY_RUN'] != 'false'
end
def debug?
ENV['DEBUG'].present?
end
def limit
ENV['LIMIT']&.to_i
end
def niceness
ENV['NICENESS'].presence
end
def logger
return @logger if defined?(@logger)
@logger = if Rails.env.development? || Rails.env.production?
Logger.new(STDOUT).tap do |stdout_logger|
stdout_logger.extend(ActiveSupport::Logger.broadcast(Rails.logger))
stdout_logger.level = debug? ? Logger::DEBUG : Logger::INFO
end
else
Rails.logger
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Cleanup::OrphanJobArtifactFilesBatch do
let(:batch_size) { 10 }
let(:dry_run) { true }
subject(:batch) { described_class.new(batch_size: batch_size, dry_run: dry_run) }
context 'no dry run' do
let(:dry_run) { false }
it 'deletes only orphan job artifacts from disk' do
job_artifact = create(:ci_job_artifact, :archive)
orphan_artifact = create(:ci_job_artifact, :archive)
batch << artifact_path(job_artifact)
batch << artifact_path(orphan_artifact)
orphan_artifact.delete
batch.clean!
expect(batch.artifact_files.count).to eq(2)
expect(batch.lost_and_found.count).to eq(1)
expect(batch.lost_and_found.first.artifact_id).to eq(orphan_artifact.id)
end
it 'does not mix up job ID and artifact ID' do
# take maximum ID of both tables to avoid any collision
max_id = [Ci::Build.maximum(:id), Ci::JobArtifact.maximum(:id)].compact.max.to_i
job_a = create(:ci_build, id: max_id + 1)
job_b = create(:ci_build, id: max_id + 2)
# reuse the build IDs for the job artifact IDs, but swap them
job_artifact_b = create(:ci_job_artifact, :archive, job: job_b, id: max_id + 1)
job_artifact_a = create(:ci_job_artifact, :archive, job: job_a, id: max_id + 2)
batch << artifact_path(job_artifact_a)
batch << artifact_path(job_artifact_b)
job_artifact_b.delete
batch.clean!
expect(File.exist?(job_artifact_a.file.path)).to be_truthy
expect(File.exist?(job_artifact_b.file.path)).to be_falsey
end
end
context 'with dry run' do
it 'does not remove files' do
job_artifact = create(:ci_job_artifact, :archive)
batch << job_artifact.file.path
job_artifact.delete
expect(batch).not_to receive(:remove_file!)
batch.clean!
expect(File.exist?(job_artifact.file.path)).to be_truthy
end
end
def artifact_path(job_artifact)
Pathname.new(job_artifact.file.path).parent.to_s
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Cleanup::OrphanJobArtifactFiles do
let(:null_logger) { Logger.new('/dev/null') }
subject(:cleanup) { described_class.new(logger: null_logger) }
before do
allow(null_logger).to receive(:info)
end
it 'passes on dry_run' do
expect(Gitlab::Cleanup::OrphanJobArtifactFilesBatch)
.to receive(:new)
.with(dry_run: false, batch_size: anything, logger: anything)
.at_least(:once)
.and_call_original
described_class.new(dry_run: false).run!
end
it 'errors when invalid niceness is given' do
cleanup = described_class.new(logger: null_logger, niceness: 'FooBar')
expect(null_logger).to receive(:error).with(/FooBar/)
cleanup.run!
end
it 'finds artifacts on disk' do
artifact = create(:ci_job_artifact, :archive)
expect(cleanup).to receive(:find_artifacts).and_yield(artifact.file.path)
cleanup.run!
end
it 'stops when limit is reached' do
cleanup = described_class.new(limit: 1)
mock_artifacts_found(cleanup, 'tmp/foo/bar/1', 'tmp/foo/bar/2')
cleanup.run!
expect(cleanup.total_found).to eq(1)
end
it 'cleans even if batch is not full' do
mock_artifacts_found(cleanup, 'tmp/foo/bar/1')
expect(cleanup).to receive(:clean_batch!).and_call_original
cleanup.run!
end
it 'cleans in batches' do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
mock_artifacts_found(cleanup, 'tmp/foo/bar/1', 'tmp/foo/bar/2', 'tmp/foo/bar/3')
expect(cleanup).to receive(:clean_batch!).twice.and_call_original
cleanup.run!
end
def mock_artifacts_found(cleanup, *files)
mock = allow(cleanup).to receive(:find_artifacts)
files.each { |file| mock.and_yield(file) }
end
end
......@@ -156,4 +156,33 @@ describe 'gitlab:cleanup rake tasks' do
end
end
end
describe 'gitlab:cleanup:orphan_job_artifact_files' do
subject(:rake_task) { run_rake_task('gitlab:cleanup:orphan_job_artifact_files') }
it 'runs the task without errors' do
expect(Gitlab::Cleanup::OrphanJobArtifactFiles)
.to receive(:new).and_call_original
expect { rake_task }.not_to raise_error
end
context 'with DRY_RUN set to false' do
before do
stub_env('DRY_RUN', 'false')
end
it 'passes dry_run correctly' do
expect(Gitlab::Cleanup::OrphanJobArtifactFiles)
.to receive(:new)
.with(limit: anything,
dry_run: false,
niceness: anything,
logger: anything)
.and_call_original
rake_task
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment