Commit 15f1b494 authored by Etienne Baqué's avatar Etienne Baqué

Merge branch '291069-stop-deploying-to-the-legacy-pages-storage' into 'master'

Stop deploying to the legacy pages storage

See merge request gitlab-org/gitlab!69287
parents c8a5ead0 0882ce0e
......@@ -3,18 +3,9 @@
module Projects
class UpdatePagesService < BaseService
InvalidStateError = Class.new(StandardError)
FailedToExtractError = Class.new(StandardError)
ExclusiveLeaseTaken = Class.new(StandardError)
include ::Pages::LegacyStorageLease
BLOCK_SIZE = 32.kilobytes
PUBLIC_DIR = 'public'
# this has to be invalid group name,
# as it shares the namespace with groups
TMP_EXTRACT_PATH = '@pages.tmp'
# old deployment can be cached by pages daemon
# so we need to give pages daemon some time update cache
# 10 minutes is enough, but 30 feels safer
......@@ -42,7 +33,6 @@ module Projects
validate_max_entries!
build.artifacts_file.use_file do |artifacts_path|
deploy_to_legacy_storage(artifacts_path)
create_pages_deployment(artifacts_path, build)
success
end
......@@ -78,70 +68,6 @@ module Projects
)
end
def deploy_to_legacy_storage(artifacts_path)
# path today used by one project can later be used by another
# so we can't really scope this feature flag by project or group
return unless ::Settings.pages.local_store.enabled
return if Feature.enabled?(:skip_pages_deploy_to_legacy_storage, project, default_enabled: :yaml)
# Create temporary directory in which we will extract the artifacts
make_secure_tmp_dir(tmp_path) do |tmp_path|
extract_archive!(artifacts_path, tmp_path)
# Check if we did extract public directory
archive_public_path = File.join(tmp_path, PUBLIC_DIR)
raise InvalidStateError, 'pages miss the public folder' unless Dir.exist?(archive_public_path)
validate_outdated_sha!
deploy_page!(archive_public_path)
end
end
def extract_archive!(artifacts_path, temp_path)
if artifacts.ends_with?('.zip')
extract_zip_archive!(artifacts_path, temp_path)
else
raise InvalidStateError, 'unsupported artifacts format'
end
end
def extract_zip_archive!(artifacts_path, temp_path)
SafeZip::Extract.new(artifacts_path)
.extract(directories: [PUBLIC_DIR], to: temp_path)
rescue SafeZip::Extract::Error => e
raise FailedToExtractError, e.message
end
def deploy_page!(archive_public_path)
deployed = try_obtain_lease do
deploy_page_unsafe!(archive_public_path)
true
end
unless deployed
raise ExclusiveLeaseTaken, "Failed to deploy pages - other deployment is in progress"
end
end
def deploy_page_unsafe!(archive_public_path)
# Do atomic move of pages
# Move and removal may not be atomic, but they are significantly faster then extracting and removal
# 1. We move deployed public to previous public path (file removal is slow)
# 2. We move temporary public to be deployed public
# 3. We remove previous public path
FileUtils.mkdir_p(pages_path)
begin
FileUtils.move(public_path, previous_public_path)
rescue StandardError
end
FileUtils.move(archive_public_path, public_path)
ensure
FileUtils.rm_r(previous_public_path, force: true)
end
def create_pages_deployment(artifacts_path, build)
sha256 = build.job_artifacts_archive.file_sha256
......@@ -165,22 +91,6 @@ module Projects
)
end
def tmp_path
@tmp_path ||= File.join(::Settings.pages.path, TMP_EXTRACT_PATH)
end
def pages_path
@pages_path ||= project.pages_path
end
def public_path
@public_path ||= File.join(pages_path, PUBLIC_DIR)
end
def previous_public_path
@previous_public_path ||= File.join(pages_path, "#{PUBLIC_DIR}.#{SecureRandom.hex}")
end
def ref
build.ref
end
......@@ -216,20 +126,6 @@ module Projects
@pages_deployments_failed_total_counter ||= Gitlab::Metrics.counter(:pages_deployments_failed_total, "Counter of GitLab Pages deployments which failed")
end
def make_secure_tmp_dir(tmp_path)
FileUtils.mkdir_p(tmp_path)
path = Dir.mktmpdir(tmp_dir_prefix, tmp_path)
begin
yield(path)
ensure
FileUtils.remove_entry_secure(path)
end
end
def tmp_dir_prefix
"project-#{project.id}-build-#{build.id}-"
end
def validate_state!
raise InvalidStateError, 'missing pages artifacts' unless build.artifacts?
raise InvalidStateError, 'missing artifacts metadata' unless build.artifacts_metadata?
......
---
name: skip_pages_deploy_to_legacy_storage
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/59298
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/327725
milestone: '13.11'
type: development
group: group::release
default_enabled: false
......@@ -24,7 +24,6 @@ RSpec.describe Projects::UpdatePagesService do
.and_return(metadata)
stub_licensed_features(pages_size_limit: true)
stub_feature_flags(skip_pages_deploy_to_legacy_storage: false)
end
it_behaves_like 'pages size limit is', 250.megabytes
......
......@@ -2,12 +2,16 @@
module Backup
class Pages < Backup::Files
# pages used to deploy tmp files to this path
# if some of these files are still there, we don't need them in the backup
LEGACY_PAGES_TMP_PATH = '@pages.tmp'
attr_reader :progress
def initialize(progress)
@progress = progress
super('pages', Gitlab.config.pages.path, excludes: [::Projects::UpdatePagesService::TMP_EXTRACT_PATH])
super('pages', Gitlab.config.pages.path, excludes: [LEGACY_PAGES_TMP_PATH])
end
end
end
......@@ -19,16 +19,9 @@ RSpec.describe Projects::UpdatePagesService do
subject { described_class.new(project, build) }
before do
stub_feature_flags(skip_pages_deploy_to_legacy_storage: false)
project.legacy_remove_pages
end
context '::TMP_EXTRACT_PATH' do
subject { described_class::TMP_EXTRACT_PATH }
it { is_expected.not_to match(Gitlab::PathRegex.namespace_format_regex) }
end
context 'for new artifacts' do
context "for a valid job" do
let!(:artifacts_archive) { create(:ci_job_artifact, :correct_checksum, file: file, job: build) }
......@@ -52,36 +45,6 @@ RSpec.describe Projects::UpdatePagesService do
expect(project.pages_metadatum).to be_deployed
expect(project.pages_metadatum.artifacts_archive).to eq(artifacts_archive)
expect(project.pages_deployed?).to be_truthy
# Check that all expected files are extracted
%w[index.html zero .hidden/file].each do |filename|
expect(File.exist?(File.join(project.pages_path, 'public', filename))).to be_truthy
end
end
it 'creates a temporary directory with the project and build ID' do
expect(Dir).to receive(:mktmpdir).with("project-#{project.id}-build-#{build.id}-", anything).and_call_original
subject.execute
end
it "doesn't deploy to legacy storage if it's disabled" do
allow(Settings.pages.local_store).to receive(:enabled).and_return(false)
expect(execute).to eq(:success)
expect(project.pages_deployed?).to be_truthy
expect(File.exist?(File.join(project.pages_path, 'public', 'index.html'))).to eq(false)
end
it "doesn't deploy to legacy storage if skip_pages_deploy_to_legacy_storage is enabled" do
allow(Settings.pages.local_store).to receive(:enabled).and_return(true)
stub_feature_flags(skip_pages_deploy_to_legacy_storage: true)
expect(execute).to eq(:success)
expect(project.pages_deployed?).to be_truthy
expect(File.exist?(File.join(project.pages_path, 'public', 'index.html'))).to eq(false)
end
it 'creates pages_deployment and saves it in the metadata' do
......@@ -99,16 +62,6 @@ RSpec.describe Projects::UpdatePagesService do
expect(deployment.ci_build_id).to eq(build.id)
end
it 'fails if another deployment is in progress' do
subject.try_obtain_lease do
expect do
execute
end.to raise_error("Failed to deploy pages - other deployment is in progress")
expect(GenericCommitStatus.last.description).to eq("Failed to deploy pages - other deployment is in progress")
end
end
it 'does not fail if pages_metadata is absent' do
project.pages_metadatum.destroy!
project.reload
......@@ -156,47 +109,10 @@ RSpec.describe Projects::UpdatePagesService do
expect(GenericCommitStatus.last.description).to eq("pages site contains 3 file entries, while limit is set to 2")
end
it 'removes pages after destroy' do
expect(PagesWorker).to receive(:perform_in)
expect(project.pages_deployed?).to be_falsey
expect(Dir.exist?(File.join(project.pages_path))).to be_falsey
expect(execute).to eq(:success)
expect(project.pages_metadatum).to be_deployed
expect(project.pages_deployed?).to be_truthy
expect(Dir.exist?(File.join(project.pages_path))).to be_truthy
project.destroy!
expect(Dir.exist?(File.join(project.pages_path))).to be_falsey
expect(ProjectPagesMetadatum.find_by_project_id(project)).to be_nil
end
context 'when using empty file' do
let(:file) { empty_file }
it 'fails to extract' do
expect { execute }
.to raise_error(Projects::UpdatePagesService::FailedToExtractError)
end
end
context 'when using pages with non-writeable public' do
let(:file) { fixture_file_upload("spec/fixtures/pages_non_writeable.zip") }
context 'when using RubyZip' do
it 'succeeds to extract' do
expect(execute).to eq(:success)
expect(project.pages_metadatum).to be_deployed
end
end
end
context 'when timeout happens by DNS error' do
before do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:extract_zip_archive!).and_raise(SocketError)
allow(instance).to receive(:create_pages_deployment).and_raise(SocketError)
end
end
......@@ -209,24 +125,6 @@ RSpec.describe Projects::UpdatePagesService do
end
end
context 'when failed to extract zip artifacts' do
before do
expect_next_instance_of(described_class) do |instance|
expect(instance).to receive(:extract_zip_archive!)
.and_raise(Projects::UpdatePagesService::FailedToExtractError)
end
end
it 'raises an error' do
expect { execute }
.to raise_error(Projects::UpdatePagesService::FailedToExtractError)
build.reload
expect(deploy_status).to be_failed
expect(project.pages_metadatum).not_to be_deployed
end
end
context 'when missing artifacts metadata' do
before do
expect(build).to receive(:artifacts_metadata?).and_return(false)
......@@ -338,12 +236,6 @@ RSpec.describe Projects::UpdatePagesService do
end
end
it 'fails to remove project pages when no pages is deployed' do
expect(PagesWorker).not_to receive(:perform_in)
expect(project.pages_deployed?).to be_falsey
project.destroy!
end
it 'fails if no artifacts' do
expect(execute).not_to eq(:success)
end
......@@ -384,38 +276,6 @@ RSpec.describe Projects::UpdatePagesService do
end
end
context 'when file size is spoofed' do
let(:metadata) { spy('metadata') }
include_context 'pages zip with spoofed size'
before do
file = fixture_file_upload(fake_zip_path, 'pages.zip')
metafile = fixture_file_upload('spec/fixtures/pages.zip.meta')
create(:ci_job_artifact, :archive, file: file, job: build)
create(:ci_job_artifact, :metadata, file: metafile, job: build)
allow(build).to receive(:artifacts_metadata_entry).with('public/', recursive: true)
.and_return(metadata)
allow(metadata).to receive(:total_size).and_return(100)
# to pass entries count check
root_metadata = double('root metadata')
allow(build).to receive(:artifacts_metadata_entry).with('', recursive: true)
.and_return(root_metadata)
allow(root_metadata).to receive_message_chain(:entries, :count).and_return(10)
end
it 'raises an error' do
expect do
subject.execute
end.to raise_error(Projects::UpdatePagesService::FailedToExtractError,
'Entry public/index.html should be 1B but is larger when inflated')
expect(deploy_status).to be_script_failure
end
end
context 'when retrying the job' do
let!(:older_deploy_job) do
create(:generic_commit_status, :failed, pipeline: pipeline,
......
# frozen_string_literal: true
# the idea of creating zip archive with spoofed size is borrowed from
# https://github.com/rubyzip/rubyzip/pull/403/files#diff-118213fb4baa6404a40f89e1147661ebR88
RSpec.shared_context 'pages zip with spoofed size' do
let(:real_zip_path) { Tempfile.new(['real', '.zip']).path }
let(:fake_zip_path) { Tempfile.new(['fake', '.zip']).path }
before do
full_file_name = 'public/index.html'
true_size = 500_000
fake_size = 1
::Zip::File.open(real_zip_path, ::Zip::File::CREATE) do |zf|
zf.get_output_stream(full_file_name) do |os|
os.write 'a' * true_size
end
end
compressed_size = nil
::Zip::File.open(real_zip_path) do |zf|
a_entry = zf.find_entry(full_file_name)
compressed_size = a_entry.compressed_size
end
true_size_bytes = [compressed_size, true_size, full_file_name.size].pack('LLS')
fake_size_bytes = [compressed_size, fake_size, full_file_name.size].pack('LLS')
data = File.binread(real_zip_path)
data.gsub! true_size_bytes, fake_size_bytes
File.open(fake_zip_path, 'wb') do |file|
file.write data
end
end
after do
File.delete(real_zip_path) if File.exist?(real_zip_path)
File.delete(fake_zip_path) if File.exist?(fake_zip_path)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment