Commit c2da24f8 authored by George Koltsov's avatar George Koltsov Committed by GitLab Release Tools Bot

Update BulkImports UploadsPipeline to filter & ignore file symlinks

parent e30c2a00
# frozen_string_literal: true
# Archive Extraction Service allows extraction of contents
# from `tar` archives with an additional handling (removal)
# of file symlinks.
#
# @param tmpdir [String] A path where archive is located
# and where its contents are extracted.
# Tmpdir directory must be located under `Dir.tmpdir`.
# `BulkImports::Error` is raised if any other directory path is used.
#
# @param filename [String] Name of the file to extract contents from.
#
# @example
# dir = Dir.mktmpdir
# filename = 'things.tar'
# BulkImports::ArchiveExtractionService.new(tmpdir: dir, filename: filename).execute
# Dir.glob(File.join(dir, '**', '*'))
# => ['/path/to/tmp/dir/extracted_file_1', '/path/to/tmp/dir/extracted_file_2', '/path/to/tmp/dir/extracted_file_3']
module BulkImports
class ArchiveExtractionService
include Gitlab::ImportExport::CommandLineUtil
def initialize(tmpdir:, filename:)
@tmpdir = tmpdir
@filename = filename
@filepath = File.join(@tmpdir, @filename)
end
def execute
validate_filepath
validate_tmpdir
validate_symlink
extract_archive
remove_symlinks
tmpdir
end
private
attr_reader :tmpdir, :filename, :filepath
def validate_filepath
Gitlab::Utils.check_path_traversal!(filepath)
end
def validate_tmpdir
raise(BulkImports::Error, 'Invalid target directory') unless File.expand_path(tmpdir).start_with?(Dir.tmpdir)
end
def validate_symlink
raise(BulkImports::Error, 'Invalid file') if symlink?(filepath)
end
def symlink?(filepath)
File.lstat(filepath).symlink?
end
def extract_archive
untar_xf(archive: filepath, dir: tmpdir)
end
def extracted_files
Dir.glob(File.join(tmpdir, '**', '*'))
end
def remove_symlinks
extracted_files.each do |path|
FileUtils.rm(path) if symlink?(path)
end
end
end
end
...@@ -5,16 +5,16 @@ module BulkImports ...@@ -5,16 +5,16 @@ module BulkImports
module Pipelines module Pipelines
class UploadsPipeline class UploadsPipeline
include Pipeline include Pipeline
include Gitlab::ImportExport::CommandLineUtil
FILENAME = 'uploads.tar.gz'
AVATAR_PATTERN = %r{.*\/#{BulkImports::UploadsExportService::AVATAR_PATH}\/(?<identifier>.*)}.freeze AVATAR_PATTERN = %r{.*\/#{BulkImports::UploadsExportService::AVATAR_PATH}\/(?<identifier>.*)}.freeze
AvatarLoadingError = Class.new(StandardError) AvatarLoadingError = Class.new(StandardError)
def extract(context) def extract(_context)
download_service(tmp_dir, context).execute download_service.execute
untar_zxf(archive: File.join(tmp_dir, FILENAME), dir: tmp_dir) decompression_service.execute
extraction_service.execute
upload_file_paths = Dir.glob(File.join(tmp_dir, '**', '*')) upload_file_paths = Dir.glob(File.join(tmp_dir, '**', '*'))
BulkImports::Pipeline::ExtractedData.new(data: upload_file_paths) BulkImports::Pipeline::ExtractedData.new(data: upload_file_paths)
...@@ -29,6 +29,7 @@ module BulkImports ...@@ -29,6 +29,7 @@ module BulkImports
return unless dynamic_path return unless dynamic_path
return if File.directory?(file_path) return if File.directory?(file_path)
return if File.lstat(file_path).symlink?
named_captures = dynamic_path.named_captures.symbolize_keys named_captures = dynamic_path.named_captures.symbolize_keys
...@@ -36,20 +37,40 @@ module BulkImports ...@@ -36,20 +37,40 @@ module BulkImports
end end
def after_run(_) def after_run(_)
FileUtils.remove_entry(tmp_dir) FileUtils.remove_entry(tmp_dir) if Dir.exist?(tmp_dir)
end end
private private
def download_service(tmp_dir, context) def download_service
BulkImports::FileDownloadService.new( BulkImports::FileDownloadService.new(
configuration: context.configuration, configuration: context.configuration,
relative_url: context.entity.relation_download_url_path('uploads'), relative_url: context.entity.relation_download_url_path(relation),
dir: tmp_dir, dir: tmp_dir,
filename: FILENAME filename: targz_filename
) )
end end
def decompression_service
BulkImports::FileDecompressionService.new(dir: tmp_dir, filename: targz_filename)
end
def extraction_service
BulkImports::ArchiveExtractionService.new(tmpdir: tmp_dir, filename: tar_filename)
end
def relation
BulkImports::FileTransfer::BaseConfig::UPLOADS_RELATION
end
def tar_filename
"#{relation}.tar"
end
def targz_filename
"#{tar_filename}.gz"
end
def tmp_dir def tmp_dir
@tmp_dir ||= Dir.mktmpdir('bulk_imports') @tmp_dir ||= Dir.mktmpdir('bulk_imports')
end end
......
...@@ -18,6 +18,10 @@ module Gitlab ...@@ -18,6 +18,10 @@ module Gitlab
tar_with_options(archive: archive, dir: dir, options: 'cf') tar_with_options(archive: archive, dir: dir, options: 'cf')
end end
def untar_xf(archive:, dir:)
untar_with_options(archive: archive, dir: dir, options: 'xf')
end
def gzip(dir:, filename:) def gzip(dir:, filename:)
gzip_with_options(dir: dir, filename: filename) gzip_with_options(dir: dir, filename: filename)
end end
......
...@@ -4,6 +4,12 @@ require 'spec_helper' ...@@ -4,6 +4,12 @@ require 'spec_helper'
RSpec.describe 'ActionCableSubscriptionAdapterIdentifier override' do RSpec.describe 'ActionCableSubscriptionAdapterIdentifier override' do
describe '#identifier' do describe '#identifier' do
let!(:original_config) { ::ActionCable::Server::Base.config.cable }
after do
::ActionCable::Server::Base.config.cable = original_config
end
context 'when id key is nil on cable.yml' do context 'when id key is nil on cable.yml' do
it 'does not override server config id with action cable pid' do it 'does not override server config id with action cable pid' do
config = { config = {
......
...@@ -70,8 +70,10 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do ...@@ -70,8 +70,10 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
describe '#extract' do describe '#extract' do
it 'downloads & extracts upload paths' do it 'downloads & extracts upload paths' do
allow(Dir).to receive(:mktmpdir).and_return(tmpdir) allow(Dir).to receive(:mktmpdir).and_return(tmpdir)
expect(pipeline).to receive(:untar_zxf)
file_download_service = instance_double("BulkImports::FileDownloadService") download_service = instance_double("BulkImports::FileDownloadService")
decompression_service = instance_double("BulkImports::FileDecompressionService")
extraction_service = instance_double("BulkImports::ArchiveExtractionService")
expect(BulkImports::FileDownloadService) expect(BulkImports::FileDownloadService)
.to receive(:new) .to receive(:new)
...@@ -80,9 +82,13 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do ...@@ -80,9 +82,13 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
relative_url: "/#{entity.pluralized_name}/test/export_relations/download?relation=uploads", relative_url: "/#{entity.pluralized_name}/test/export_relations/download?relation=uploads",
dir: tmpdir, dir: tmpdir,
filename: 'uploads.tar.gz') filename: 'uploads.tar.gz')
.and_return(file_download_service) .and_return(download_service)
expect(BulkImports::FileDecompressionService).to receive(:new).with(dir: tmpdir, filename: 'uploads.tar.gz').and_return(decompression_service)
expect(BulkImports::ArchiveExtractionService).to receive(:new).with(tmpdir: tmpdir, filename: 'uploads.tar').and_return(extraction_service)
expect(file_download_service).to receive(:execute) expect(download_service).to receive(:execute)
expect(decompression_service).to receive(:execute)
expect(extraction_service).to receive(:execute)
extracted_data = pipeline.extract(context) extracted_data = pipeline.extract(context)
...@@ -106,6 +112,16 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do ...@@ -106,6 +112,16 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
expect { pipeline.load(context, uploads_dir_path) }.not_to change { portable.uploads.count } expect { pipeline.load(context, uploads_dir_path) }.not_to change { portable.uploads.count }
end end
end end
context 'when path is a symlink' do
it 'does not upload the file' do
symlink = File.join(tmpdir, 'symlink')
FileUtils.ln_s(File.join(tmpdir, upload_file_path), symlink)
expect { pipeline.load(context, symlink) }.not_to change { portable.uploads.count }
end
end
end end
end end
......
...@@ -101,4 +101,39 @@ RSpec.describe Gitlab::ImportExport::CommandLineUtil do ...@@ -101,4 +101,39 @@ RSpec.describe Gitlab::ImportExport::CommandLineUtil do
end end
end end
end end
describe '#untar_xf' do
let(:archive_dir) { Dir.mktmpdir }
after do
FileUtils.remove_entry(archive_dir)
end
it 'extracts archive without decompression' do
filename = 'archive.tar.gz'
archive_file = File.join(archive_dir, 'archive.tar')
FileUtils.copy_file(archive, File.join(archive_dir, filename))
subject.gunzip(dir: archive_dir, filename: filename)
result = subject.untar_xf(archive: archive_file, dir: archive_dir)
expect(result).to eq(true)
expect(File.exist?(archive_file)).to eq(true)
expect(File.exist?(File.join(archive_dir, 'project.json'))).to eq(true)
expect(Dir.exist?(File.join(archive_dir, 'uploads'))).to eq(true)
end
context 'when something goes wrong' do
it 'raises an error' do
expect(Gitlab::Popen).to receive(:popen).and_return(['Error', 1])
klass = Class.new do
include Gitlab::ImportExport::CommandLineUtil
end.new
expect { klass.untar_xf(archive: 'test', dir: 'test') }.to raise_error(Gitlab::ImportExport::Error, 'System call failed')
end
end
end
end end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe BulkImports::ArchiveExtractionService do
let_it_be(:tmpdir) { Dir.mktmpdir }
let_it_be(:filename) { 'symlink_export.tar' }
let_it_be(:filepath) { File.join(tmpdir, filename) }
before do
FileUtils.copy_file(File.join('spec', 'fixtures', filename), filepath)
end
after(:all) do
FileUtils.remove_entry(tmpdir)
end
subject(:service) { described_class.new(tmpdir: tmpdir, filename: filename) }
describe '#execute' do
it 'extracts files from archive and removes symlinks' do
file = File.join(tmpdir, 'project.json')
folder = File.join(tmpdir, 'uploads')
symlink = File.join(tmpdir, 'uploads', 'link.gitignore')
expect(service).to receive(:untar_xf).with(archive: filepath, dir: tmpdir).and_call_original
service.execute
expect(File.exist?(file)).to eq(true)
expect(Dir.exist?(folder)).to eq(true)
expect(File.exist?(symlink)).to eq(false)
end
context 'when dir is not in tmpdir' do
it 'raises an error' do
['/etc', '/usr', '/', '/home', '', '/some/other/path', Rails.root].each do |path|
expect { described_class.new(tmpdir: path, filename: 'filename').execute }
.to raise_error(BulkImports::Error, 'Invalid target directory')
end
end
end
context 'when archive file is a symlink' do
it 'raises an error' do
FileUtils.ln_s(File.join(tmpdir, filename), File.join(tmpdir, 'symlink'))
expect { described_class.new(tmpdir: tmpdir, filename: 'symlink').execute }
.to raise_error(BulkImports::Error, 'Invalid file')
end
end
context 'when filepath is being traversed' do
it 'raises an error' do
expect { described_class.new(tmpdir: File.join(tmpdir, '../../../'), filename: 'name').execute }
.to raise_error(Gitlab::Utils::PathTraversalAttackError, 'Invalid path')
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment