Commit d1098afd authored by Vladimir Shushlin's avatar Vladimir Shushlin Committed by Kamil Trzciński

Fix TypeError in ::Pages::ZipDirectoryService

https://sentry.gitlab.net/gitlab/gitlabcom/issues/2479537/

nil can be passed if parent directory doesn't exist
parent 312d5560
......@@ -2,10 +2,11 @@
module Pages
class MigrateFromLegacyStorageService
def initialize(logger, migration_threads, batch_size)
def initialize(logger, migration_threads:, batch_size:, ignore_invalid_entries:)
@logger = logger
@migration_threads = migration_threads
@batch_size = batch_size
@ignore_invalid_entries = ignore_invalid_entries
@migrated = 0
@errored = 0
......@@ -59,19 +60,19 @@ module Pages
def migrate_project(project)
result = nil
time = Benchmark.realtime do
result = ::Pages::MigrateLegacyStorageToDeploymentService.new(project).execute
result = ::Pages::MigrateLegacyStorageToDeploymentService.new(project, ignore_invalid_entries: @ignore_invalid_entries).execute
end
if result[:status] == :success
@logger.info("project_id: #{project.id} #{project.pages_path} has been migrated in #{time} seconds")
@logger.info("project_id: #{project.id} #{project.pages_path} has been migrated in #{time.round(2)} seconds")
@counters_lock.synchronize { @migrated += 1 }
else
@logger.error("project_id: #{project.id} #{project.pages_path} failed to be migrated in #{time} seconds: #{result[:message]}")
@logger.error("project_id: #{project.id} #{project.pages_path} failed to be migrated in #{time.round(2)} seconds: #{result[:message]}")
@counters_lock.synchronize { @errored += 1 }
end
rescue => e
@counters_lock.synchronize { @errored += 1 }
@logger.error("#{e.message} project_id: #{project&.id}")
@logger.error("project_id: #{project&.id} #{project&.pages_path} failed to be migrated: #{e.message}")
Gitlab::ErrorTracking.track_exception(e, project_id: project&.id)
end
end
......
......@@ -9,8 +9,9 @@ module Pages
attr_reader :project
def initialize(project)
def initialize(project, ignore_invalid_entries: false)
@project = project
@ignore_invalid_entries = ignore_invalid_entries
end
def execute
......@@ -26,7 +27,7 @@ module Pages
private
def execute_unsafe
zip_result = ::Pages::ZipDirectoryService.new(project.pages_path).execute
zip_result = ::Pages::ZipDirectoryService.new(project.pages_path, ignore_invalid_entries: @ignore_invalid_entries).execute
if zip_result[:status] == :error
if !project.pages_metadatum&.reload&.pages_deployment &&
......
......@@ -10,12 +10,17 @@ module Pages
PUBLIC_DIR = 'public'
def initialize(input_dir)
attr_reader :public_dir, :real_dir
def initialize(input_dir, ignore_invalid_entries: false)
@input_dir = input_dir
@ignore_invalid_entries = ignore_invalid_entries
end
def execute
return error("Can not find valid public dir in #{@input_dir}") unless valid_path?(public_dir)
unless resolve_public_dir
return error("Can not find valid public dir in #{@input_dir}")
end
output_file = File.join(real_dir, "@migrated.zip") # '@' to avoid any name collision with groups or projects
......@@ -35,24 +40,36 @@ module Pages
private
def resolve_public_dir
@real_dir = File.realpath(@input_dir)
@public_dir = File.join(real_dir, PUBLIC_DIR)
valid_path?(public_dir)
rescue Errno::ENOENT
false
end
def write_entry(zipfile, zipfile_path)
disk_file_path = File.join(real_dir, zipfile_path)
unless valid_path?(disk_file_path)
# archive with invalid entry will just have this entry missing
raise InvalidEntryError
raise InvalidEntryError, "#{disk_file_path} is invalid, input_dir: #{@input_dir}"
end
case File.lstat(disk_file_path).ftype
ftype = File.lstat(disk_file_path).ftype
case ftype
when 'directory'
recursively_zip_directory(zipfile, disk_file_path, zipfile_path)
when 'file', 'link'
zipfile.add(zipfile_path, disk_file_path)
else
raise InvalidEntryError
raise InvalidEntryError, "#{disk_file_path} has invalid ftype: #{ftype}, input_dir: #{@input_dir}"
end
rescue InvalidEntryError => e
rescue Errno::ENOENT, Errno::ELOOP, InvalidEntryError => e
Gitlab::ErrorTracking.track_exception(e, input_dir: @input_dir, disk_file_path: disk_file_path)
raise e unless @ignore_invalid_entries
end
def recursively_zip_directory(zipfile, disk_file_path, zipfile_path)
......@@ -70,31 +87,11 @@ module Pages
end
end
# that should never happen, but we want to be safer
# in theory without this we would allow to use symlinks
# to pack any directory on disk
# it isn't possible because SafeZip doesn't extract such archives
# SafeZip was introduced only recently,
# so we have invalid entries on disk
def valid_path?(disk_file_path)
realpath = File.realpath(disk_file_path)
realpath == public_dir || realpath.start_with?(public_dir + "/")
# happens if target of symlink isn't there
rescue => e
Gitlab::ErrorTracking.track_exception(e, input_dir: real_dir, disk_file_path: disk_file_path)
false
end
def real_dir
strong_memoize(:real_dir) do
File.realpath(@input_dir) rescue nil
end
end
def public_dir
strong_memoize(:public_dir) do
File.join(real_dir, PUBLIC_DIR) rescue nil
end
end
end
end
......@@ -8,7 +8,10 @@ namespace :gitlab do
task migrate_legacy_storage: :gitlab_environment do
logger.info('Starting to migrate legacy pages storage to zip deployments')
result = ::Pages::MigrateFromLegacyStorageService.new(logger, migration_threads, batch_size).execute
result = ::Pages::MigrateFromLegacyStorageService.new(logger,
migration_threads: migration_threads,
batch_size: batch_size,
ignore_invalid_entries: ignore_invalid_entries).execute
logger.info("A total of #{result[:migrated] + result[:errored]} projects were processed.")
logger.info("- The #{result[:migrated]} projects migrated successfully")
......@@ -42,5 +45,11 @@ namespace :gitlab do
def batch_size
ENV.fetch('PAGES_MIGRATION_BATCH_SIZE', '10').to_i
end
def ignore_invalid_entries
Gitlab::Utils.to_boolean(
ENV.fetch('PAGES_MIGRATION_IGNORE_INVALID_ENTRIES', 'false')
)
end
end
end
......@@ -3,7 +3,7 @@
require 'spec_helper'
RSpec.describe Pages::MigrateFromLegacyStorageService do
let(:service) { described_class.new(Rails.logger, 3, 10) }
let(:service) { described_class.new(Rails.logger, migration_threads: 3, batch_size: 10, ignore_invalid_entries: false) }
it 'does not try to migrate pages if pages are not deployed' do
expect(::Pages::MigrateLegacyStorageToDeploymentService).not_to receive(:new)
......@@ -22,7 +22,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do
end
end
service = described_class.new(Rails.logger, 3, 2)
service = described_class.new(Rails.logger, migration_threads: 3, batch_size: 2, ignore_invalid_entries: false)
threads = Concurrent::Set.new
......@@ -49,7 +49,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do
context 'when pages directory does not exist' do
it 'tries to migrate the project, but does not crash' do
expect_next_instance_of(::Pages::MigrateLegacyStorageToDeploymentService, project) do |service|
expect_next_instance_of(::Pages::MigrateLegacyStorageToDeploymentService, project, ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original
end
......@@ -66,7 +66,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do
end
it 'migrates pages projects without deployments' do
expect_next_instance_of(::Pages::MigrateLegacyStorageToDeploymentService, project) do |service|
expect_next_instance_of(::Pages::MigrateLegacyStorageToDeploymentService, project, ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original
end
......
......@@ -6,6 +6,14 @@ RSpec.describe Pages::MigrateLegacyStorageToDeploymentService do
let(:project) { create(:project, :repository) }
let(:service) { described_class.new(project) }
it 'calls ::Pages::ZipDirectoryService' do
expect_next_instance_of(::Pages::ZipDirectoryService, project.pages_path, ignore_invalid_entries: true) do |zip_service|
expect(zip_service).to receive(:execute).and_call_original
end
expect(described_class.new(project, ignore_invalid_entries: true).execute[:status]).to eq(:error)
end
it 'marks pages as not deployed if public directory is absent' do
project.mark_pages_as_deployed
......
......@@ -10,8 +10,14 @@ RSpec.describe Pages::ZipDirectoryService do
end
end
let(:ignore_invalid_entries) { false }
let(:service) do
described_class.new(@work_dir, ignore_invalid_entries: ignore_invalid_entries)
end
let(:result) do
described_class.new(@work_dir).execute
service.execute
end
let(:status) { result[:status] }
......@@ -20,6 +26,8 @@ RSpec.describe Pages::ZipDirectoryService do
let(:entries_count) { result[:entries_count] }
it 'returns error if project pages dir does not exist' do
expect(Gitlab::ErrorTracking).not_to receive(:track_exception)
expect(
described_class.new("/tmp/not/existing/dir").execute
).to eq(status: :error, message: "Can not find valid public dir in /tmp/not/existing/dir")
......@@ -132,32 +140,69 @@ RSpec.describe Pages::ZipDirectoryService do
end
end
it 'ignores the symlink pointing outside of public directory' do
create_file("target.html", "hello")
create_link("public/link.html", "../target.html")
shared_examples "raises or ignores file" do |raised_exception, file|
it 'raises error' do
expect do
result
end.to raise_error(raised_exception)
end
context 'when errors are ignored' do
let(:ignore_invalid_entries) { true }
it 'does not create entry' do
with_zip_file do |zip_file|
expect { zip_file.get_entry("public/link.html") }.to raise_error(Errno::ENOENT)
expect { zip_file.get_entry(file) }.to raise_error(Errno::ENOENT)
end
end
end
end
context 'when symlink points outside of public directory' do
before do
create_file("target.html", "hello")
create_link("public/link.html", "../target.html")
end
include_examples "raises or ignores file", described_class::InvalidEntryError, "public/link.html"
end
it 'ignores the symlink if target is absent' do
context 'when target of the symlink is absent' do
before do
create_link("public/link.html", "./target.html")
end
with_zip_file do |zip_file|
expect { zip_file.get_entry("public/link.html") }.to raise_error(Errno::ENOENT)
include_examples "raises or ignores file", Errno::ENOENT, "public/link.html"
end
context 'when targets itself' do
before do
create_link("public/link.html", "./link.html")
end
it 'ignores symlink if is absolute and points to outside of directory' do
include_examples "raises or ignores file", Errno::ELOOP, "public/link.html"
end
context 'when symlink is absolute and points to outside of directory' do
before do
target = File.join(@work_dir, "target")
FileUtils.touch(target)
create_link("public/link.html", target)
end
with_zip_file do |zip_file|
expect { zip_file.get_entry("public/link.html") }.to raise_error(Errno::ENOENT)
include_examples "raises or ignores file", described_class::InvalidEntryError, "public/link.html"
end
context 'when entry has unknown ftype' do
before do
file = create_file("public/index.html", "hello")
allow(File).to receive(:lstat).and_call_original
expect(File).to receive(:lstat).with(file) { double("lstat", ftype: "unknown") }
end
include_examples "raises or ignores file", described_class::InvalidEntryError, "public/index.html"
end
it "includes raw symlink if it's target is a valid directory" do
......@@ -204,9 +249,13 @@ RSpec.describe Pages::ZipDirectoryService do
end
def create_file(name, content)
File.open(File.join(@work_dir, name), "w") do |f|
file_path = File.join(@work_dir, name)
File.open(file_path, "w") do |f|
f.write(content)
end
file_path
end
def create_dir(dir)
......
......@@ -11,7 +11,10 @@ RSpec.describe 'gitlab:pages' do
subject { run_rake_task('gitlab:pages:migrate_legacy_storage') }
it 'calls migration service' do
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything, 3, 10) do |service|
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything,
migration_threads: 3,
batch_size: 10,
ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original
end
......@@ -21,7 +24,10 @@ RSpec.describe 'gitlab:pages' do
it 'uses PAGES_MIGRATION_THREADS environment variable' do
stub_env('PAGES_MIGRATION_THREADS', '5')
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything, 5, 10) do |service|
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything,
migration_threads: 5,
batch_size: 10,
ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original
end
......@@ -31,7 +37,23 @@ RSpec.describe 'gitlab:pages' do
it 'uses PAGES_MIGRATION_BATCH_SIZE environment variable' do
stub_env('PAGES_MIGRATION_BATCH_SIZE', '100')
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything, 3, 100) do |service|
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything,
migration_threads: 3,
batch_size: 100,
ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original
end
subject
end
it 'uses PAGES_MIGRATION_IGNORE_INVALID_ENTRIES environment variable' do
stub_env('PAGES_MIGRATION_IGNORE_INVALID_ENTRIES', 'true')
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything,
migration_threads: 3,
batch_size: 10,
ignore_invalid_entries: true) do |service|
expect(service).to receive(:execute).and_call_original
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment