Commit aff097e8 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch 'migrate-old-artifacts' into 'master'

Fix data inconsistency issue for old artifacts by moving them to a currently used path

Closes #32036

See merge request !11824
parents 6f14a3ea 6185d12c
......@@ -255,38 +255,6 @@ module Ci
Time.now - updated_at > 15.minutes.to_i
end
##
# Deprecated
#
# This contains a hotfix for CI build data integrity, see #4246
#
# This method is used by `ArtifactUploader` to create a store_dir.
# Warning: Uploader uses it after AND before file has been stored.
#
# This method returns old path to artifacts only if it already exists.
#
def artifacts_path
# We need the project even if it's soft deleted, because whenever
# we're really deleting the project, we'll also delete the builds,
# and in order to delete the builds, we need to know where to find
# the artifacts, which is depending on the data of the project.
# We need to retain the project in this case.
the_project = project || unscoped_project
old = File.join(created_at.utc.strftime('%Y_%m'),
the_project.ci_id.to_s,
id.to_s)
old_store = File.join(ArtifactUploader.artifacts_path, old)
return old if the_project.ci_id && File.directory?(old_store)
File.join(
created_at.utc.strftime('%Y_%m'),
the_project.id.to_s,
id.to_s
)
end
def valid_token?(token)
self.token && ActiveSupport::SecurityUtils.variable_size_secure_compare(token, self.token)
end
......
class ArtifactUploader < GitlabUploader
storage :file
attr_accessor :build, :field
attr_reader :job, :field
def self.artifacts_path
def self.local_artifacts_store
Gitlab.config.artifacts.path
end
def self.artifacts_upload_path
File.join(self.artifacts_path, 'tmp/uploads/')
File.join(self.local_artifacts_store, 'tmp/uploads/')
end
def self.artifacts_cache_path
File.join(self.artifacts_path, 'tmp/cache/')
end
def initialize(build, field)
@build, @field = build, field
def initialize(job, field)
@job, @field = job, field
end
def store_dir
File.join(self.class.artifacts_path, @build.artifacts_path)
default_local_path
end
def cache_dir
File.join(self.class.artifacts_cache_path, @build.artifacts_path)
File.join(self.class.local_artifacts_store, 'tmp/cache')
end
private
def default_local_path
File.join(self.class.local_artifacts_store, default_path)
end
def filename
file.try(:filename)
def default_path
File.join(job.created_at.utc.strftime('%Y_%m'), job.project_id.to_s, job.id.to_s)
end
end
......@@ -10,7 +10,11 @@ class GitlabUploader < CarrierWave::Uploader::Base
delegate :base_dir, to: :class
def file_storage?
self.class.storage == CarrierWave::Storage::File
storage.is_a?(CarrierWave::Storage::File)
end
def file_cache_storage?
cache_storage.is_a?(CarrierWave::Storage::File)
end
# Reduce disk IO
......
---
title: Migrate artifacts to a new path
merge_request:
author:
class MigrateOldArtifacts < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
# This uses special heuristic to find potential candidates for data migration
# Read more about this here: https://gitlab.com/gitlab-org/gitlab-ce/issues/32036#note_30422345
def up
builds_with_artifacts.find_each do |build|
build.migrate_artifacts!
end
end
def down
end
private
def builds_with_artifacts
Build.with_artifacts
.joins('JOIN projects ON projects.id = ci_builds.project_id')
.where('ci_builds.id < ?', min_id)
.where('projects.ci_id IS NOT NULL')
.select('id', 'created_at', 'project_id', 'projects.ci_id AS ci_id')
end
def min_id
Build.joins('JOIN projects ON projects.id = ci_builds.project_id')
.where('projects.ci_id IS NULL')
.pluck('coalesce(min(ci_builds.id), 0)')
.first
end
class Build < ActiveRecord::Base
self.table_name = 'ci_builds'
scope :with_artifacts, -> { where.not(artifacts_file: [nil, '']) }
def migrate_artifacts!
return unless File.exist?(source_artifacts_path)
return if File.exist?(target_artifacts_path)
ensure_target_path
FileUtils.move(source_artifacts_path, target_artifacts_path)
end
private
def source_artifacts_path
@source_artifacts_path ||=
File.join(Gitlab.config.artifacts.path,
created_at.utc.strftime('%Y_%m'),
ci_id.to_s, id.to_s)
end
def target_artifacts_path
@target_artifacts_path ||=
File.join(Gitlab.config.artifacts.path,
created_at.utc.strftime('%Y_%m'),
project_id.to_s, id.to_s)
end
def ensure_target_path
directory = File.dirname(target_artifacts_path)
FileUtils.mkdir_p(directory) unless Dir.exist?(directory)
end
end
end
......@@ -311,6 +311,16 @@ module API
end
end
def present_artifacts!(artifacts_file)
return not_found! unless artifacts_file.exists?
if artifacts_file.file_storage?
present_file!(artifacts_file.path, artifacts_file.filename)
else
redirect_to(artifacts_file.url)
end
end
private
def private_token
......
......@@ -224,16 +224,6 @@ module API
find_build(id) || not_found!
end
def present_artifacts!(artifacts_file)
if !artifacts_file.file_storage?
redirect_to(build.artifacts_file.url)
elsif artifacts_file.exists?
present_file!(artifacts_file.path, artifacts_file.filename)
else
not_found!
end
end
def filter_builds(builds, scope)
return builds if scope.nil? || scope.empty?
......
......@@ -241,16 +241,7 @@ module API
get '/:id/artifacts' do
job = authenticate_job!
artifacts_file = job.artifacts_file
unless artifacts_file.file_storage?
return redirect_to job.artifacts_file.url
end
unless artifacts_file.exists?
not_found!
end
present_file!(artifacts_file.path, artifacts_file.filename)
present_artifacts!(job.artifacts_file)
end
end
end
......
......@@ -225,16 +225,6 @@ module API
find_build(id) || not_found!
end
def present_artifacts!(artifacts_file)
if !artifacts_file.file_storage?
redirect_to(build.artifacts_file.url)
elsif artifacts_file.exists?
present_file!(artifacts_file.path, artifacts_file.filename)
else
not_found!
end
end
def filter_builds(builds, scope)
return builds if scope.nil? || scope.empty?
......
......@@ -3,7 +3,7 @@ require 'backup/files'
module Backup
class Artifacts < Files
def initialize
super('artifacts', ArtifactUploader.artifacts_path)
super('artifacts', ArtifactUploader.local_artifacts_store)
end
def create_files_dir
......
......@@ -187,14 +187,14 @@ module Ci
build = authenticate_build!
artifacts_file = build.artifacts_file
unless artifacts_file.file_storage?
return redirect_to build.artifacts_file.url
end
unless artifacts_file.exists?
not_found!
end
unless artifacts_file.file_storage?
return redirect_to build.artifacts_file.url
end
present_file!(artifacts_file.path, artifacts_file.filename)
end
......
# encoding: utf-8
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20170523083112_migrate_old_artifacts.rb')
describe MigrateOldArtifacts do
let(:migration) { described_class.new }
let!(:directory) { Dir.mktmpdir }
before do
allow(Gitlab.config.artifacts).to receive(:path).and_return(directory)
end
after do
FileUtils.remove_entry_secure(directory)
end
context 'with migratable data' do
let(:project1) { create(:empty_project, ci_id: 2) }
let(:project2) { create(:empty_project, ci_id: 3) }
let(:project3) { create(:empty_project) }
let(:pipeline1) { create(:ci_empty_pipeline, project: project1) }
let(:pipeline2) { create(:ci_empty_pipeline, project: project2) }
let(:pipeline3) { create(:ci_empty_pipeline, project: project3) }
let!(:build_with_legacy_artifacts) { create(:ci_build, pipeline: pipeline1) }
let!(:build_without_artifacts) { create(:ci_build, pipeline: pipeline1) }
let!(:build2) { create(:ci_build, :artifacts, pipeline: pipeline2) }
let!(:build3) { create(:ci_build, :artifacts, pipeline: pipeline3) }
before do
store_artifacts_in_legacy_path(build_with_legacy_artifacts)
end
it "legacy artifacts are not accessible" do
expect(build_with_legacy_artifacts.artifacts?).to be_falsey
end
it "legacy artifacts are set" do
expect(build_with_legacy_artifacts.artifacts_file_identifier).not_to be_nil
end
describe '#min_id' do
subject { migration.send(:min_id) }
it 'returns the newest build for which ci_id is not defined' do
is_expected.to eq(build3.id)
end
end
describe '#builds_with_artifacts' do
subject { migration.send(:builds_with_artifacts).map(&:id) }
it 'returns a list of builds that has artifacts and could be migrated' do
is_expected.to contain_exactly(build_with_legacy_artifacts.id, build2.id)
end
end
describe '#up' do
context 'when migrating artifacts' do
before do
migration.up
end
it 'all files do have artifacts' do
Ci::Build.with_artifacts do |build|
expect(build).to have_artifacts
end
end
it 'artifacts are no longer present on legacy path' do
expect(File.exist?(legacy_path(build_with_legacy_artifacts))).to eq(false)
end
end
context 'when there are aritfacts in old and new directory' do
before do
store_artifacts_in_legacy_path(build2)
migration.up
end
it 'does not move old files' do
expect(File.exist?(legacy_path(build2))).to eq(true)
end
end
end
private
def store_artifacts_in_legacy_path(build)
FileUtils.mkdir_p(legacy_path(build))
FileUtils.copy(
Rails.root.join('spec/fixtures/ci_build_artifacts.zip'),
File.join(legacy_path(build), "ci_build_artifacts.zip"))
FileUtils.copy(
Rails.root.join('spec/fixtures/ci_build_artifacts_metadata.gz'),
File.join(legacy_path(build), "ci_build_artifacts_metadata.gz"))
build.update_columns(
artifacts_file: 'ci_build_artifacts.zip',
artifacts_metadata: 'ci_build_artifacts_metadata.gz')
build.reload
end
def legacy_path(build)
File.join(directory,
build.created_at.utc.strftime('%Y_%m'),
build.project.ci_id.to_s,
build.id.to_s)
end
end
end
require 'rails_helper'
describe ArtifactUploader do
let(:job) { create(:ci_build) }
let(:uploader) { described_class.new(job, :artifacts_file) }
let(:path) { Gitlab.config.artifacts.path }
describe '.local_artifacts_store' do
subject { described_class.local_artifacts_store }
it "delegate to artifacts path" do
expect(Gitlab.config.artifacts).to receive(:path)
subject
end
end
describe '.artifacts_upload_path' do
subject { described_class.artifacts_upload_path }
it { is_expected.to start_with(path) }
it { is_expected.to end_with('tmp/uploads/') }
end
describe '#store_dir' do
subject { uploader.store_dir }
it { is_expected.to start_with(path) }
it { is_expected.to end_with("#{job.project_id}/#{job.id}") }
end
describe '#cache_dir' do
subject { uploader.cache_dir }
it { is_expected.to start_with(path) }
it { is_expected.to end_with('tmp/cache') }
end
end
require 'rails_helper'
require 'carrierwave/storage/fog'
describe GitlabUploader do
let(:uploader_class) { Class.new(described_class) }
subject { uploader_class.new }
describe '#file_storage?' do
context 'when file storage is used' do
before do
uploader_class.storage(:file)
end
it { is_expected.to be_file_storage }
end
context 'when is remote storage' do
before do
uploader_class.storage(:fog)
end
it { is_expected.not_to be_file_storage }
end
end
describe '#file_cache_storage?' do
context 'when file storage is used' do
before do
uploader_class.cache_storage(:file)
end
it { is_expected.to be_file_cache_storage }
end
context 'when is remote storage' do
before do
uploader_class.cache_storage(:fog)
end
it { is_expected.not_to be_file_cache_storage }
end
end
describe '#move_to_cache' do
it 'is true' do
expect(subject.move_to_cache).to eq(true)
end
end
describe '#move_to_store' do
it 'is true' do
expect(subject.move_to_store).to eq(true)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment