Commit 66744469 authored by Kamil Trzciński's avatar Kamil Trzciński Committed by Yorick Peterse

Extract GitLab Pages using RubyZip

RubyZip allows us to perform strong validation of
expanded paths where we do extract file.

We introduce the following additional checks
to extract routines:

1. None of path components can be symlinked,
2. We drop privileges support for directories,
3. Symlink source needs to point within the target directory,
   like `public/`,
4. The symlink source needs to exist ahead of time.
parent a1bf0882
...@@ -57,6 +57,7 @@ gem 'u2f', '~> 0.2.1' ...@@ -57,6 +57,7 @@ gem 'u2f', '~> 0.2.1'
# GitLab Pages # GitLab Pages
gem 'validates_hostname', '~> 1.0.6' gem 'validates_hostname', '~> 1.0.6'
gem 'rubyzip', '~> 1.2.2', require: false
# Browser detection # Browser detection
gem 'browser', '~> 2.5' gem 'browser', '~> 2.5'
......
...@@ -1138,6 +1138,7 @@ DEPENDENCIES ...@@ -1138,6 +1138,7 @@ DEPENDENCIES
ruby-prof (~> 0.17.0) ruby-prof (~> 0.17.0)
ruby-progressbar ruby-progressbar
ruby_parser (~> 3.8) ruby_parser (~> 3.8)
rubyzip (~> 1.2.2)
rugged (~> 0.27) rugged (~> 0.27)
sanitize (~> 4.6) sanitize (~> 4.6)
sass (~> 3.5) sass (~> 3.5)
......
...@@ -7,7 +7,11 @@ module Projects ...@@ -7,7 +7,11 @@ module Projects
BLOCK_SIZE = 32.kilobytes BLOCK_SIZE = 32.kilobytes
MAX_SIZE = 1.terabyte MAX_SIZE = 1.terabyte
SITE_PATH = 'public/'.freeze PUBLIC_DIR = 'public'.freeze
# this has to be invalid group name,
# as it shares the namespace with groups
TMP_EXTRACT_PATH = '@pages.tmp'.freeze
attr_reader :build attr_reader :build
...@@ -27,12 +31,11 @@ module Projects ...@@ -27,12 +31,11 @@ module Projects
raise InvalidStateError, 'pages are outdated' unless latest? raise InvalidStateError, 'pages are outdated' unless latest?
# Create temporary directory in which we will extract the artifacts # Create temporary directory in which we will extract the artifacts
FileUtils.mkdir_p(tmp_path) make_secure_tmp_dir(tmp_path) do |archive_path|
Dir.mktmpdir(nil, tmp_path) do |archive_path|
extract_archive!(archive_path) extract_archive!(archive_path)
# Check if we did extract public directory # Check if we did extract public directory
archive_public_path = File.join(archive_path, 'public') archive_public_path = File.join(archive_path, PUBLIC_DIR)
raise InvalidStateError, 'pages miss the public folder' unless Dir.exist?(archive_public_path) raise InvalidStateError, 'pages miss the public folder' unless Dir.exist?(archive_public_path)
raise InvalidStateError, 'pages are outdated' unless latest? raise InvalidStateError, 'pages are outdated' unless latest?
...@@ -85,22 +88,18 @@ module Projects ...@@ -85,22 +88,18 @@ module Projects
raise InvalidStateError, 'missing artifacts metadata' unless build.artifacts_metadata? raise InvalidStateError, 'missing artifacts metadata' unless build.artifacts_metadata?
# Calculate page size after extract # Calculate page size after extract
public_entry = build.artifacts_metadata_entry(SITE_PATH, recursive: true) public_entry = build.artifacts_metadata_entry(PUBLIC_DIR + '/', recursive: true)
if public_entry.total_size > max_size if public_entry.total_size > max_size
raise InvalidStateError, "artifacts for pages are too large: #{public_entry.total_size}" raise InvalidStateError, "artifacts for pages are too large: #{public_entry.total_size}"
end end
# Requires UnZip at least 6.00 Info-ZIP.
# -qq be (very) quiet
# -n never overwrite existing files
# We add * to end of SITE_PATH, because we want to extract SITE_PATH and all subdirectories
site_path = File.join(SITE_PATH, '*')
build.artifacts_file.use_file do |artifacts_path| build.artifacts_file.use_file do |artifacts_path|
unless system(*%W(unzip -n #{artifacts_path} #{site_path} -d #{temp_path})) SafeZip::Extract.new(artifacts_path)
raise FailedToExtractError, 'pages failed to extract' .extract(directories: [PUBLIC_DIR], to: temp_path)
end
end end
rescue SafeZip::Extract::Error => e
raise FailedToExtractError, e.message
end end
def deploy_page!(archive_public_path) def deploy_page!(archive_public_path)
...@@ -139,7 +138,7 @@ module Projects ...@@ -139,7 +138,7 @@ module Projects
end end
def tmp_path def tmp_path
@tmp_path ||= File.join(::Settings.pages.path, 'tmp') @tmp_path ||= File.join(::Settings.pages.path, TMP_EXTRACT_PATH)
end end
def pages_path def pages_path
...@@ -147,11 +146,11 @@ module Projects ...@@ -147,11 +146,11 @@ module Projects
end end
def public_path def public_path
@public_path ||= File.join(pages_path, 'public') @public_path ||= File.join(pages_path, PUBLIC_DIR)
end end
def previous_public_path def previous_public_path
@previous_public_path ||= File.join(pages_path, "public.#{SecureRandom.hex}") @previous_public_path ||= File.join(pages_path, "#{PUBLIC_DIR}.#{SecureRandom.hex}")
end end
def ref def ref
...@@ -188,5 +187,15 @@ module Projects ...@@ -188,5 +187,15 @@ module Projects
def pages_deployments_failed_total_counter def pages_deployments_failed_total_counter
@pages_deployments_failed_total_counter ||= Gitlab::Metrics.counter(:pages_deployments_failed_total, "Counter of GitLab Pages deployments which failed") @pages_deployments_failed_total_counter ||= Gitlab::Metrics.counter(:pages_deployments_failed_total, "Counter of GitLab Pages deployments which failed")
end end
def make_secure_tmp_dir(tmp_path)
FileUtils.mkdir_p(tmp_path)
path = Dir.mktmpdir(nil, tmp_path)
begin
yield(path)
ensure
FileUtils.remove_entry_secure(path)
end
end
end end
end end
---
title: Extract GitLab Pages using RubyZip
merge_request:
author:
type: security
# frozen_string_literal: true
module SafeZip
class Entry
attr_reader :zip_archive, :zip_entry
attr_reader :path, :params
def initialize(zip_archive, zip_entry, params)
@zip_archive = zip_archive
@zip_entry = zip_entry
@params = params
@path = ::File.expand_path(zip_entry.name, params.extract_path)
end
def path_dir
::File.dirname(path)
end
def real_path_dir
::File.realpath(path_dir)
end
def exist?
::File.exist?(path)
end
def extract
# do not extract if file is not part of target directory
return false unless matching_target_directory
# do not overwrite existing file
raise SafeZip::Extract::AlreadyExistsError, "File already exists #{zip_entry.name}" if exist?
create_path_dir
if zip_entry.file?
extract_file
elsif zip_entry.directory?
extract_dir
elsif zip_entry.symlink?
extract_symlink
else
raise SafeZip::Extract::UnsupportedEntryError, "File #{zip_entry.name} cannot be extracted"
end
rescue SafeZip::Extract::Error
raise
rescue => e
raise SafeZip::Extract::ExtractError, e.message
end
private
def extract_file
zip_archive.extract(zip_entry, path)
end
def extract_dir
FileUtils.mkdir(path)
end
def extract_symlink
source_path = read_symlink
real_source_path = expand_symlink(source_path)
# ensure that source path of symlink is within target directories
unless real_source_path.start_with?(matching_target_directory)
raise SafeZip::Extract::PermissionDeniedError, "Symlink cannot be created targeting: #{source_path}"
end
::File.symlink(source_path, path)
end
def create_path_dir
# Create all directories, but ignore permissions
FileUtils.mkdir_p(path_dir)
# disallow to make path dirs to point to another directories
unless path_dir == real_path_dir
raise SafeZip::Extract::PermissionDeniedError, "Directory of #{zip_entry.name} points to another directory"
end
end
def matching_target_directory
params.matching_target_directory(path)
end
def read_symlink
zip_archive.read(zip_entry)
end
def expand_symlink(source_path)
::File.realpath(source_path, path_dir)
rescue
raise SafeZip::Extract::SymlinkSourceDoesNotExistError, "Symlink source #{source_path} does not exist"
end
end
end
# frozen_string_literal: true
module SafeZip
class Extract
Error = Class.new(StandardError)
PermissionDeniedError = Class.new(Error)
SymlinkSourceDoesNotExistError = Class.new(Error)
UnsupportedEntryError = Class.new(Error)
AlreadyExistsError = Class.new(Error)
NoMatchingError = Class.new(Error)
ExtractError = Class.new(Error)
attr_reader :archive_path
def initialize(archive_file)
@archive_path = archive_file
end
def extract(opts = {})
params = SafeZip::ExtractParams.new(**opts)
if Feature.enabled?(:safezip_use_rubyzip, default_enabled: true)
extract_with_ruby_zip(params)
else
legacy_unsafe_extract_with_system_zip(params)
end
end
private
def extract_with_ruby_zip(params)
Zip::File.open(archive_path) do |zip_archive|
# Extract all files in the following order:
# 1. Directories first,
# 2. Files next,
# 3. Symlinks last (or anything else)
extracted = extract_all_entries(zip_archive, params,
zip_archive.lazy.select(&:directory?))
extracted += extract_all_entries(zip_archive, params,
zip_archive.lazy.select(&:file?))
extracted += extract_all_entries(zip_archive, params,
zip_archive.lazy.reject(&:directory?).reject(&:file?))
raise NoMatchingError, 'No entries extracted' unless extracted > 0
end
end
def extract_all_entries(zip_archive, params, entries)
entries.count do |zip_entry|
SafeZip::Entry.new(zip_archive, zip_entry, params)
.extract
end
end
def legacy_unsafe_extract_with_system_zip(params)
# Requires UnZip at least 6.00 Info-ZIP.
# -n never overwrite existing files
args = %W(unzip -n -qq #{archive_path})
# We add * to end of directory, because we want to extract directory and all subdirectories
args += params.directories_wildcard
# Target directory where we extract
args += %W(-d #{params.extract_path})
unless system(*args)
raise Error, 'archive failed to extract'
end
end
end
end
# frozen_string_literal: true
module SafeZip
class ExtractParams
include Gitlab::Utils::StrongMemoize
attr_reader :directories, :extract_path
def initialize(directories:, to:)
@directories = directories
@extract_path = ::File.realpath(to)
end
def matching_target_directory(path)
target_directories.find do |directory|
path.start_with?(directory)
end
end
def target_directories
strong_memoize(:target_directories) do
directories.map do |directory|
::File.join(::File.expand_path(directory, extract_path), '')
end
end
end
def directories_wildcard
strong_memoize(:directories_wildcard) do
directories.map do |directory|
::File.join(directory, '*')
end
end
end
end
end
require 'spec_helper'
describe SafeZip::Entry do
let(:target_path) { Dir.mktmpdir('safe-zip') }
let(:directories) { %w(public folder/with/subfolder) }
let(:params) { SafeZip::ExtractParams.new(directories: directories, to: target_path) }
let(:entry) { described_class.new(zip_archive, zip_entry, params) }
let(:entry_name) { 'public/folder/index.html' }
let(:entry_path_dir) { File.join(target_path, File.dirname(entry_name)) }
let(:entry_path) { File.join(target_path, entry_name) }
let(:zip_archive) { double }
let(:zip_entry) do
double(
name: entry_name,
file?: false,
directory?: false,
symlink?: false)
end
after do
FileUtils.remove_entry_secure(target_path)
end
context '#path_dir' do
subject { entry.path_dir }
it { is_expected.to eq(target_path + '/public/folder') }
end
context '#exist?' do
subject { entry.exist? }
context 'when entry does not exist' do
it { is_expected.not_to be_truthy }
end
context 'when entry does exist' do
before do
create_entry
end
it { is_expected.to be_truthy }
end
end
describe '#extract' do
subject { entry.extract }
context 'when entry does not match the filtered directories' do
using RSpec::Parameterized::TableSyntax
where(:entry_name) do
[
'assets/folder/index.html',
'public/../folder/index.html',
'public/../../../../../index.html',
'../../../../../public/index.html',
'/etc/passwd'
]
end
with_them do
it 'does not extract file' do
is_expected.to be_falsey
end
end
end
context 'when entry does exist' do
before do
create_entry
end
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::AlreadyExistsError)
end
end
context 'when entry type is unknown' do
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::UnsupportedEntryError)
end
end
context 'when entry is valid' do
shared_examples 'secured symlinks' do
context 'when we try to extract entry into symlinked folder' do
before do
FileUtils.mkdir_p(File.join(target_path, "source"))
File.symlink("source", File.join(target_path, "public"))
end
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError)
end
end
end
context 'and is file' do
before do
allow(zip_entry).to receive(:file?) { true }
end
it 'does extract file' do
expect(zip_archive).to receive(:extract)
.with(zip_entry, entry_path)
.and_return(true)
is_expected.to be_truthy
end
it_behaves_like 'secured symlinks'
end
context 'and is directory' do
let(:entry_name) { 'public/folder/assets' }
before do
allow(zip_entry).to receive(:directory?) { true }
end
it 'does create directory' do
is_expected.to be_truthy
expect(File.exist?(entry_path)).to eq(true)
end
it_behaves_like 'secured symlinks'
end
context 'and is symlink' do
let(:entry_name) { 'public/folder/assets' }
before do
allow(zip_entry).to receive(:symlink?) { true }
allow(zip_archive).to receive(:read).with(zip_entry) { entry_symlink }
end
shared_examples 'a valid symlink' do
it 'does create symlink' do
is_expected.to be_truthy
expect(File.exist?(entry_path)).to eq(true)
end
end
context 'when source is within target' do
let(:entry_symlink) { '../images' }
context 'but does not exist' do
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::SymlinkSourceDoesNotExistError)
end
end
context 'and does exist' do
before do
FileUtils.mkdir_p(File.join(target_path, 'public', 'images'))
end
it_behaves_like 'a valid symlink'
end
end
context 'when source points outside of target' do
let(:entry_symlink) { '../../images' }
before do
FileUtils.mkdir(File.join(target_path, 'images'))
end
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError)
end
end
context 'when source points to /etc/passwd' do
let(:entry_symlink) { '/etc/passwd' }
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError)
end
end
end
end
end
private
def create_entry
FileUtils.mkdir_p(entry_path_dir)
FileUtils.touch(entry_path)
end
end
require 'spec_helper'
describe SafeZip::ExtractParams do
let(:target_path) { Dir.mktmpdir("safe-zip") }
let(:params) { described_class.new(directories: directories, to: target_path) }
let(:directories) { %w(public folder/with/subfolder) }
after do
FileUtils.remove_entry_secure(target_path)
end
describe '#extract_path' do
subject { params.extract_path }
it { is_expected.to eq(target_path) }
end
describe '#matching_target_directory' do
using RSpec::Parameterized::TableSyntax
subject { params.matching_target_directory(target_path + path) }
where(:path, :result) do
'/public/index.html' | '/public/'
'/non/existing/path' | nil
'/public' | nil
'/folder/with/index.html' | nil
end
with_them do
it { is_expected.to eq(result ? target_path + result : nil) }
end
end
describe '#target_directories' do
subject { params.target_directories }
it 'starts with target_path' do
is_expected.to all(start_with(target_path + '/'))
end
it 'ends with / for all paths' do
is_expected.to all(end_with('/'))
end
end
describe '#directories_wildcard' do
subject { params.directories_wildcard }
it 'adds * for all paths' do
is_expected.to all(end_with('/*'))
end
end
end
require 'spec_helper'
describe SafeZip::Extract do
let(:target_path) { Dir.mktmpdir('safe-zip') }
let(:directories) { %w(public) }
let(:object) { described_class.new(archive) }
let(:archive) { Rails.root.join('spec', 'fixtures', 'safe_zip', archive_name) }
after do
FileUtils.remove_entry_secure(target_path)
end
context '#extract' do
subject { object.extract(directories: directories, to: target_path) }
shared_examples 'extracts archive' do |param|
before do
stub_feature_flags(safezip_use_rubyzip: param)
end
it 'does extract archive' do
subject
expect(File.exist?(File.join(target_path, 'public', 'index.html'))).to eq(true)
expect(File.exist?(File.join(target_path, 'source'))).to eq(false)
end
end
shared_examples 'fails to extract archive' do |param|
before do
stub_feature_flags(safezip_use_rubyzip: param)
end
it 'does not extract archive' do
expect { subject }.to raise_error(SafeZip::Extract::Error)
end
end
%w(valid-simple.zip valid-symlinks-first.zip valid-non-writeable.zip).each do |name|
context "when using #{name} archive" do
let(:archive_name) { name }
context 'for RubyZip' do
it_behaves_like 'extracts archive', true
end
context 'for UnZip' do
it_behaves_like 'extracts archive', false
end
end
end
%w(invalid-symlink-does-not-exist.zip invalid-symlinks-outside.zip).each do |name|
context "when using #{name} archive" do
let(:archive_name) { name }
context 'for RubyZip' do
it_behaves_like 'fails to extract archive', true
end
context 'for UnZip (UNSAFE)' do
it_behaves_like 'extracts archive', false
end
end
end
context 'when no matching directories are found' do
let(:archive_name) { 'valid-simple.zip' }
let(:directories) { %w(non/existing) }
context 'for RubyZip' do
it_behaves_like 'fails to extract archive', true
end
context 'for UnZip' do
it_behaves_like 'fails to extract archive', false
end
end
end
end
...@@ -5,24 +5,27 @@ describe Projects::UpdatePagesService do ...@@ -5,24 +5,27 @@ describe Projects::UpdatePagesService do
set(:pipeline) { create(:ci_pipeline, project: project, sha: project.commit('HEAD').sha) } set(:pipeline) { create(:ci_pipeline, project: project, sha: project.commit('HEAD').sha) }
set(:build) { create(:ci_build, pipeline: pipeline, ref: 'HEAD') } set(:build) { create(:ci_build, pipeline: pipeline, ref: 'HEAD') }
let(:invalid_file) { fixture_file_upload('spec/fixtures/dk.png') } let(:invalid_file) { fixture_file_upload('spec/fixtures/dk.png') }
let(:extension) { 'zip' }
let(:file) { fixture_file_upload("spec/fixtures/pages.#{extension}") } let(:file) { fixture_file_upload("spec/fixtures/pages.zip") }
let(:empty_file) { fixture_file_upload("spec/fixtures/pages_empty.#{extension}") } let(:empty_file) { fixture_file_upload("spec/fixtures/pages_empty.zip") }
let(:metadata) do let(:metadata_filename) { "spec/fixtures/pages.zip.meta" }
filename = "spec/fixtures/pages.#{extension}.meta" let(:metadata) { fixture_file_upload(metadata_filename) if File.exist?(metadata_filename) }
fixture_file_upload(filename) if File.exist?(filename)
end
subject { described_class.new(project, build) } subject { described_class.new(project, build) }
before do before do
stub_feature_flags(safezip_use_rubyzip: true)
project.remove_pages project.remove_pages
end end
context 'legacy artifacts' do context '::TMP_EXTRACT_PATH' do
let(:extension) { 'zip' } subject { described_class::TMP_EXTRACT_PATH }
it { is_expected.not_to match(Gitlab::PathRegex.namespace_format_regex) }
end
context 'legacy artifacts' do
before do before do
build.update(legacy_artifacts_file: file) build.update(legacy_artifacts_file: file)
build.update(legacy_artifacts_metadata: metadata) build.update(legacy_artifacts_metadata: metadata)
...@@ -132,6 +135,20 @@ describe Projects::UpdatePagesService do ...@@ -132,6 +135,20 @@ describe Projects::UpdatePagesService do
end end
end end
context 'when using pages with non-writeable public' do
let(:file) { fixture_file_upload("spec/fixtures/pages_non_writeable.zip") }
context 'when using RubyZip' do
before do
stub_feature_flags(safezip_use_rubyzip: true)
end
it 'succeeds to extract' do
expect(execute).to eq(:success)
end
end
end
context 'when timeout happens by DNS error' do context 'when timeout happens by DNS error' do
before do before do
allow_any_instance_of(described_class) allow_any_instance_of(described_class)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment