Commit d1098afd authored by Vladimir Shushlin's avatar Vladimir Shushlin Committed by Kamil Trzciński

Fix TypeError in ::Pages::ZipDirectoryService

https://sentry.gitlab.net/gitlab/gitlabcom/issues/2479537/

nil can be passed if parent directory doesn't exist
parent 312d5560
...@@ -2,10 +2,11 @@ ...@@ -2,10 +2,11 @@
module Pages module Pages
class MigrateFromLegacyStorageService class MigrateFromLegacyStorageService
def initialize(logger, migration_threads, batch_size) def initialize(logger, migration_threads:, batch_size:, ignore_invalid_entries:)
@logger = logger @logger = logger
@migration_threads = migration_threads @migration_threads = migration_threads
@batch_size = batch_size @batch_size = batch_size
@ignore_invalid_entries = ignore_invalid_entries
@migrated = 0 @migrated = 0
@errored = 0 @errored = 0
...@@ -59,19 +60,19 @@ module Pages ...@@ -59,19 +60,19 @@ module Pages
def migrate_project(project) def migrate_project(project)
result = nil result = nil
time = Benchmark.realtime do time = Benchmark.realtime do
result = ::Pages::MigrateLegacyStorageToDeploymentService.new(project).execute result = ::Pages::MigrateLegacyStorageToDeploymentService.new(project, ignore_invalid_entries: @ignore_invalid_entries).execute
end end
if result[:status] == :success if result[:status] == :success
@logger.info("project_id: #{project.id} #{project.pages_path} has been migrated in #{time} seconds") @logger.info("project_id: #{project.id} #{project.pages_path} has been migrated in #{time.round(2)} seconds")
@counters_lock.synchronize { @migrated += 1 } @counters_lock.synchronize { @migrated += 1 }
else else
@logger.error("project_id: #{project.id} #{project.pages_path} failed to be migrated in #{time} seconds: #{result[:message]}") @logger.error("project_id: #{project.id} #{project.pages_path} failed to be migrated in #{time.round(2)} seconds: #{result[:message]}")
@counters_lock.synchronize { @errored += 1 } @counters_lock.synchronize { @errored += 1 }
end end
rescue => e rescue => e
@counters_lock.synchronize { @errored += 1 } @counters_lock.synchronize { @errored += 1 }
@logger.error("#{e.message} project_id: #{project&.id}") @logger.error("project_id: #{project&.id} #{project&.pages_path} failed to be migrated: #{e.message}")
Gitlab::ErrorTracking.track_exception(e, project_id: project&.id) Gitlab::ErrorTracking.track_exception(e, project_id: project&.id)
end end
end end
......
...@@ -9,8 +9,9 @@ module Pages ...@@ -9,8 +9,9 @@ module Pages
attr_reader :project attr_reader :project
def initialize(project) def initialize(project, ignore_invalid_entries: false)
@project = project @project = project
@ignore_invalid_entries = ignore_invalid_entries
end end
def execute def execute
...@@ -26,7 +27,7 @@ module Pages ...@@ -26,7 +27,7 @@ module Pages
private private
def execute_unsafe def execute_unsafe
zip_result = ::Pages::ZipDirectoryService.new(project.pages_path).execute zip_result = ::Pages::ZipDirectoryService.new(project.pages_path, ignore_invalid_entries: @ignore_invalid_entries).execute
if zip_result[:status] == :error if zip_result[:status] == :error
if !project.pages_metadatum&.reload&.pages_deployment && if !project.pages_metadatum&.reload&.pages_deployment &&
......
...@@ -10,12 +10,17 @@ module Pages ...@@ -10,12 +10,17 @@ module Pages
PUBLIC_DIR = 'public' PUBLIC_DIR = 'public'
def initialize(input_dir) attr_reader :public_dir, :real_dir
def initialize(input_dir, ignore_invalid_entries: false)
@input_dir = input_dir @input_dir = input_dir
@ignore_invalid_entries = ignore_invalid_entries
end end
def execute def execute
return error("Can not find valid public dir in #{@input_dir}") unless valid_path?(public_dir) unless resolve_public_dir
return error("Can not find valid public dir in #{@input_dir}")
end
output_file = File.join(real_dir, "@migrated.zip") # '@' to avoid any name collision with groups or projects output_file = File.join(real_dir, "@migrated.zip") # '@' to avoid any name collision with groups or projects
...@@ -35,24 +40,36 @@ module Pages ...@@ -35,24 +40,36 @@ module Pages
private private
def resolve_public_dir
@real_dir = File.realpath(@input_dir)
@public_dir = File.join(real_dir, PUBLIC_DIR)
valid_path?(public_dir)
rescue Errno::ENOENT
false
end
def write_entry(zipfile, zipfile_path) def write_entry(zipfile, zipfile_path)
disk_file_path = File.join(real_dir, zipfile_path) disk_file_path = File.join(real_dir, zipfile_path)
unless valid_path?(disk_file_path) unless valid_path?(disk_file_path)
# archive with invalid entry will just have this entry missing # archive with invalid entry will just have this entry missing
raise InvalidEntryError raise InvalidEntryError, "#{disk_file_path} is invalid, input_dir: #{@input_dir}"
end end
case File.lstat(disk_file_path).ftype ftype = File.lstat(disk_file_path).ftype
case ftype
when 'directory' when 'directory'
recursively_zip_directory(zipfile, disk_file_path, zipfile_path) recursively_zip_directory(zipfile, disk_file_path, zipfile_path)
when 'file', 'link' when 'file', 'link'
zipfile.add(zipfile_path, disk_file_path) zipfile.add(zipfile_path, disk_file_path)
else else
raise InvalidEntryError raise InvalidEntryError, "#{disk_file_path} has invalid ftype: #{ftype}, input_dir: #{@input_dir}"
end end
rescue InvalidEntryError => e rescue Errno::ENOENT, Errno::ELOOP, InvalidEntryError => e
Gitlab::ErrorTracking.track_exception(e, input_dir: @input_dir, disk_file_path: disk_file_path) Gitlab::ErrorTracking.track_exception(e, input_dir: @input_dir, disk_file_path: disk_file_path)
raise e unless @ignore_invalid_entries
end end
def recursively_zip_directory(zipfile, disk_file_path, zipfile_path) def recursively_zip_directory(zipfile, disk_file_path, zipfile_path)
...@@ -70,31 +87,11 @@ module Pages ...@@ -70,31 +87,11 @@ module Pages
end end
end end
# that should never happen, but we want to be safer # SafeZip was introduced only recently,
# in theory without this we would allow to use symlinks # so we have invalid entries on disk
# to pack any directory on disk
# it isn't possible because SafeZip doesn't extract such archives
def valid_path?(disk_file_path) def valid_path?(disk_file_path)
realpath = File.realpath(disk_file_path) realpath = File.realpath(disk_file_path)
realpath == public_dir || realpath.start_with?(public_dir + "/") realpath == public_dir || realpath.start_with?(public_dir + "/")
# happens if target of symlink isn't there
rescue => e
Gitlab::ErrorTracking.track_exception(e, input_dir: real_dir, disk_file_path: disk_file_path)
false
end
def real_dir
strong_memoize(:real_dir) do
File.realpath(@input_dir) rescue nil
end
end
def public_dir
strong_memoize(:public_dir) do
File.join(real_dir, PUBLIC_DIR) rescue nil
end
end end
end end
end end
...@@ -8,7 +8,10 @@ namespace :gitlab do ...@@ -8,7 +8,10 @@ namespace :gitlab do
task migrate_legacy_storage: :gitlab_environment do task migrate_legacy_storage: :gitlab_environment do
logger.info('Starting to migrate legacy pages storage to zip deployments') logger.info('Starting to migrate legacy pages storage to zip deployments')
result = ::Pages::MigrateFromLegacyStorageService.new(logger, migration_threads, batch_size).execute result = ::Pages::MigrateFromLegacyStorageService.new(logger,
migration_threads: migration_threads,
batch_size: batch_size,
ignore_invalid_entries: ignore_invalid_entries).execute
logger.info("A total of #{result[:migrated] + result[:errored]} projects were processed.") logger.info("A total of #{result[:migrated] + result[:errored]} projects were processed.")
logger.info("- The #{result[:migrated]} projects migrated successfully") logger.info("- The #{result[:migrated]} projects migrated successfully")
...@@ -42,5 +45,11 @@ namespace :gitlab do ...@@ -42,5 +45,11 @@ namespace :gitlab do
def batch_size def batch_size
ENV.fetch('PAGES_MIGRATION_BATCH_SIZE', '10').to_i ENV.fetch('PAGES_MIGRATION_BATCH_SIZE', '10').to_i
end end
def ignore_invalid_entries
Gitlab::Utils.to_boolean(
ENV.fetch('PAGES_MIGRATION_IGNORE_INVALID_ENTRIES', 'false')
)
end
end end
end end
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Pages::MigrateFromLegacyStorageService do RSpec.describe Pages::MigrateFromLegacyStorageService do
let(:service) { described_class.new(Rails.logger, 3, 10) } let(:service) { described_class.new(Rails.logger, migration_threads: 3, batch_size: 10, ignore_invalid_entries: false) }
it 'does not try to migrate pages if pages are not deployed' do it 'does not try to migrate pages if pages are not deployed' do
expect(::Pages::MigrateLegacyStorageToDeploymentService).not_to receive(:new) expect(::Pages::MigrateLegacyStorageToDeploymentService).not_to receive(:new)
...@@ -22,7 +22,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do ...@@ -22,7 +22,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do
end end
end end
service = described_class.new(Rails.logger, 3, 2) service = described_class.new(Rails.logger, migration_threads: 3, batch_size: 2, ignore_invalid_entries: false)
threads = Concurrent::Set.new threads = Concurrent::Set.new
...@@ -49,7 +49,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do ...@@ -49,7 +49,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do
context 'when pages directory does not exist' do context 'when pages directory does not exist' do
it 'tries to migrate the project, but does not crash' do it 'tries to migrate the project, but does not crash' do
expect_next_instance_of(::Pages::MigrateLegacyStorageToDeploymentService, project) do |service| expect_next_instance_of(::Pages::MigrateLegacyStorageToDeploymentService, project, ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original expect(service).to receive(:execute).and_call_original
end end
...@@ -66,7 +66,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do ...@@ -66,7 +66,7 @@ RSpec.describe Pages::MigrateFromLegacyStorageService do
end end
it 'migrates pages projects without deployments' do it 'migrates pages projects without deployments' do
expect_next_instance_of(::Pages::MigrateLegacyStorageToDeploymentService, project) do |service| expect_next_instance_of(::Pages::MigrateLegacyStorageToDeploymentService, project, ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original expect(service).to receive(:execute).and_call_original
end end
......
...@@ -6,6 +6,14 @@ RSpec.describe Pages::MigrateLegacyStorageToDeploymentService do ...@@ -6,6 +6,14 @@ RSpec.describe Pages::MigrateLegacyStorageToDeploymentService do
let(:project) { create(:project, :repository) } let(:project) { create(:project, :repository) }
let(:service) { described_class.new(project) } let(:service) { described_class.new(project) }
it 'calls ::Pages::ZipDirectoryService' do
expect_next_instance_of(::Pages::ZipDirectoryService, project.pages_path, ignore_invalid_entries: true) do |zip_service|
expect(zip_service).to receive(:execute).and_call_original
end
expect(described_class.new(project, ignore_invalid_entries: true).execute[:status]).to eq(:error)
end
it 'marks pages as not deployed if public directory is absent' do it 'marks pages as not deployed if public directory is absent' do
project.mark_pages_as_deployed project.mark_pages_as_deployed
......
...@@ -10,8 +10,14 @@ RSpec.describe Pages::ZipDirectoryService do ...@@ -10,8 +10,14 @@ RSpec.describe Pages::ZipDirectoryService do
end end
end end
let(:ignore_invalid_entries) { false }
let(:service) do
described_class.new(@work_dir, ignore_invalid_entries: ignore_invalid_entries)
end
let(:result) do let(:result) do
described_class.new(@work_dir).execute service.execute
end end
let(:status) { result[:status] } let(:status) { result[:status] }
...@@ -20,6 +26,8 @@ RSpec.describe Pages::ZipDirectoryService do ...@@ -20,6 +26,8 @@ RSpec.describe Pages::ZipDirectoryService do
let(:entries_count) { result[:entries_count] } let(:entries_count) { result[:entries_count] }
it 'returns error if project pages dir does not exist' do it 'returns error if project pages dir does not exist' do
expect(Gitlab::ErrorTracking).not_to receive(:track_exception)
expect( expect(
described_class.new("/tmp/not/existing/dir").execute described_class.new("/tmp/not/existing/dir").execute
).to eq(status: :error, message: "Can not find valid public dir in /tmp/not/existing/dir") ).to eq(status: :error, message: "Can not find valid public dir in /tmp/not/existing/dir")
...@@ -132,32 +140,69 @@ RSpec.describe Pages::ZipDirectoryService do ...@@ -132,32 +140,69 @@ RSpec.describe Pages::ZipDirectoryService do
end end
end end
it 'ignores the symlink pointing outside of public directory' do shared_examples "raises or ignores file" do |raised_exception, file|
create_file("target.html", "hello") it 'raises error' do
create_link("public/link.html", "../target.html") expect do
result
end.to raise_error(raised_exception)
end
with_zip_file do |zip_file| context 'when errors are ignored' do
expect { zip_file.get_entry("public/link.html") }.to raise_error(Errno::ENOENT) let(:ignore_invalid_entries) { true }
it 'does not create entry' do
with_zip_file do |zip_file|
expect { zip_file.get_entry(file) }.to raise_error(Errno::ENOENT)
end
end
end end
end end
it 'ignores the symlink if target is absent' do context 'when symlink points outside of public directory' do
create_link("public/link.html", "./target.html") before do
create_file("target.html", "hello")
create_link("public/link.html", "../target.html")
end
with_zip_file do |zip_file| include_examples "raises or ignores file", described_class::InvalidEntryError, "public/link.html"
expect { zip_file.get_entry("public/link.html") }.to raise_error(Errno::ENOENT) end
context 'when target of the symlink is absent' do
before do
create_link("public/link.html", "./target.html")
end end
include_examples "raises or ignores file", Errno::ENOENT, "public/link.html"
end end
it 'ignores symlink if is absolute and points to outside of directory' do context 'when targets itself' do
target = File.join(@work_dir, "target") before do
FileUtils.touch(target) create_link("public/link.html", "./link.html")
end
create_link("public/link.html", target) include_examples "raises or ignores file", Errno::ELOOP, "public/link.html"
end
with_zip_file do |zip_file| context 'when symlink is absolute and points to outside of directory' do
expect { zip_file.get_entry("public/link.html") }.to raise_error(Errno::ENOENT) before do
target = File.join(@work_dir, "target")
FileUtils.touch(target)
create_link("public/link.html", target)
end end
include_examples "raises or ignores file", described_class::InvalidEntryError, "public/link.html"
end
context 'when entry has unknown ftype' do
before do
file = create_file("public/index.html", "hello")
allow(File).to receive(:lstat).and_call_original
expect(File).to receive(:lstat).with(file) { double("lstat", ftype: "unknown") }
end
include_examples "raises or ignores file", described_class::InvalidEntryError, "public/index.html"
end end
it "includes raw symlink if it's target is a valid directory" do it "includes raw symlink if it's target is a valid directory" do
...@@ -204,9 +249,13 @@ RSpec.describe Pages::ZipDirectoryService do ...@@ -204,9 +249,13 @@ RSpec.describe Pages::ZipDirectoryService do
end end
def create_file(name, content) def create_file(name, content)
File.open(File.join(@work_dir, name), "w") do |f| file_path = File.join(@work_dir, name)
File.open(file_path, "w") do |f|
f.write(content) f.write(content)
end end
file_path
end end
def create_dir(dir) def create_dir(dir)
......
...@@ -11,7 +11,10 @@ RSpec.describe 'gitlab:pages' do ...@@ -11,7 +11,10 @@ RSpec.describe 'gitlab:pages' do
subject { run_rake_task('gitlab:pages:migrate_legacy_storage') } subject { run_rake_task('gitlab:pages:migrate_legacy_storage') }
it 'calls migration service' do it 'calls migration service' do
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything, 3, 10) do |service| expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything,
migration_threads: 3,
batch_size: 10,
ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original expect(service).to receive(:execute).and_call_original
end end
...@@ -21,7 +24,10 @@ RSpec.describe 'gitlab:pages' do ...@@ -21,7 +24,10 @@ RSpec.describe 'gitlab:pages' do
it 'uses PAGES_MIGRATION_THREADS environment variable' do it 'uses PAGES_MIGRATION_THREADS environment variable' do
stub_env('PAGES_MIGRATION_THREADS', '5') stub_env('PAGES_MIGRATION_THREADS', '5')
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything, 5, 10) do |service| expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything,
migration_threads: 5,
batch_size: 10,
ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original expect(service).to receive(:execute).and_call_original
end end
...@@ -31,7 +37,23 @@ RSpec.describe 'gitlab:pages' do ...@@ -31,7 +37,23 @@ RSpec.describe 'gitlab:pages' do
it 'uses PAGES_MIGRATION_BATCH_SIZE environment variable' do it 'uses PAGES_MIGRATION_BATCH_SIZE environment variable' do
stub_env('PAGES_MIGRATION_BATCH_SIZE', '100') stub_env('PAGES_MIGRATION_BATCH_SIZE', '100')
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything, 3, 100) do |service| expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything,
migration_threads: 3,
batch_size: 100,
ignore_invalid_entries: false) do |service|
expect(service).to receive(:execute).and_call_original
end
subject
end
it 'uses PAGES_MIGRATION_IGNORE_INVALID_ENTRIES environment variable' do
stub_env('PAGES_MIGRATION_IGNORE_INVALID_ENTRIES', 'true')
expect_next_instance_of(::Pages::MigrateFromLegacyStorageService, anything,
migration_threads: 3,
batch_size: 10,
ignore_invalid_entries: true) do |service|
expect(service).to receive(:execute).and_call_original expect(service).to receive(:execute).and_call_original
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment