Commit c2da24f8 authored by George Koltsov's avatar George Koltsov Committed by GitLab Release Tools Bot

Update BulkImports UploadsPipeline to filter & ignore file symlinks

parent e30c2a00
# frozen_string_literal: true
# Archive Extraction Service allows extraction of contents
# from `tar` archives with an additional handling (removal)
# of file symlinks.
#
# @param tmpdir [String] A path where archive is located
# and where its contents are extracted.
# Tmpdir directory must be located under `Dir.tmpdir`.
# `BulkImports::Error` is raised if any other directory path is used.
#
# @param filename [String] Name of the file to extract contents from.
#
# @example
# dir = Dir.mktmpdir
# filename = 'things.tar'
# BulkImports::ArchiveExtractionService.new(tmpdir: dir, filename: filename).execute
# Dir.glob(File.join(dir, '**', '*'))
# => ['/path/to/tmp/dir/extracted_file_1', '/path/to/tmp/dir/extracted_file_2', '/path/to/tmp/dir/extracted_file_3']
module BulkImports
class ArchiveExtractionService
include Gitlab::ImportExport::CommandLineUtil
def initialize(tmpdir:, filename:)
@tmpdir = tmpdir
@filename = filename
@filepath = File.join(@tmpdir, @filename)
end
def execute
validate_filepath
validate_tmpdir
validate_symlink
extract_archive
remove_symlinks
tmpdir
end
private
attr_reader :tmpdir, :filename, :filepath
def validate_filepath
Gitlab::Utils.check_path_traversal!(filepath)
end
def validate_tmpdir
raise(BulkImports::Error, 'Invalid target directory') unless File.expand_path(tmpdir).start_with?(Dir.tmpdir)
end
def validate_symlink
raise(BulkImports::Error, 'Invalid file') if symlink?(filepath)
end
def symlink?(filepath)
File.lstat(filepath).symlink?
end
def extract_archive
untar_xf(archive: filepath, dir: tmpdir)
end
def extracted_files
Dir.glob(File.join(tmpdir, '**', '*'))
end
def remove_symlinks
extracted_files.each do |path|
FileUtils.rm(path) if symlink?(path)
end
end
end
end
......@@ -5,16 +5,16 @@ module BulkImports
module Pipelines
class UploadsPipeline
include Pipeline
include Gitlab::ImportExport::CommandLineUtil
FILENAME = 'uploads.tar.gz'
AVATAR_PATTERN = %r{.*\/#{BulkImports::UploadsExportService::AVATAR_PATH}\/(?<identifier>.*)}.freeze
AvatarLoadingError = Class.new(StandardError)
def extract(context)
download_service(tmp_dir, context).execute
untar_zxf(archive: File.join(tmp_dir, FILENAME), dir: tmp_dir)
def extract(_context)
download_service.execute
decompression_service.execute
extraction_service.execute
upload_file_paths = Dir.glob(File.join(tmp_dir, '**', '*'))
BulkImports::Pipeline::ExtractedData.new(data: upload_file_paths)
......@@ -29,6 +29,7 @@ module BulkImports
return unless dynamic_path
return if File.directory?(file_path)
return if File.lstat(file_path).symlink?
named_captures = dynamic_path.named_captures.symbolize_keys
......@@ -36,20 +37,40 @@ module BulkImports
end
def after_run(_)
FileUtils.remove_entry(tmp_dir)
FileUtils.remove_entry(tmp_dir) if Dir.exist?(tmp_dir)
end
private
def download_service(tmp_dir, context)
def download_service
BulkImports::FileDownloadService.new(
configuration: context.configuration,
relative_url: context.entity.relation_download_url_path('uploads'),
relative_url: context.entity.relation_download_url_path(relation),
dir: tmp_dir,
filename: FILENAME
filename: targz_filename
)
end
def decompression_service
BulkImports::FileDecompressionService.new(dir: tmp_dir, filename: targz_filename)
end
def extraction_service
BulkImports::ArchiveExtractionService.new(tmpdir: tmp_dir, filename: tar_filename)
end
def relation
BulkImports::FileTransfer::BaseConfig::UPLOADS_RELATION
end
def tar_filename
"#{relation}.tar"
end
def targz_filename
"#{tar_filename}.gz"
end
def tmp_dir
@tmp_dir ||= Dir.mktmpdir('bulk_imports')
end
......
......@@ -18,6 +18,10 @@ module Gitlab
tar_with_options(archive: archive, dir: dir, options: 'cf')
end
def untar_xf(archive:, dir:)
untar_with_options(archive: archive, dir: dir, options: 'xf')
end
def gzip(dir:, filename:)
gzip_with_options(dir: dir, filename: filename)
end
......
......@@ -4,6 +4,12 @@ require 'spec_helper'
RSpec.describe 'ActionCableSubscriptionAdapterIdentifier override' do
describe '#identifier' do
let!(:original_config) { ::ActionCable::Server::Base.config.cable }
after do
::ActionCable::Server::Base.config.cable = original_config
end
context 'when id key is nil on cable.yml' do
it 'does not override server config id with action cable pid' do
config = {
......
......@@ -70,8 +70,10 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
describe '#extract' do
it 'downloads & extracts upload paths' do
allow(Dir).to receive(:mktmpdir).and_return(tmpdir)
expect(pipeline).to receive(:untar_zxf)
file_download_service = instance_double("BulkImports::FileDownloadService")
download_service = instance_double("BulkImports::FileDownloadService")
decompression_service = instance_double("BulkImports::FileDecompressionService")
extraction_service = instance_double("BulkImports::ArchiveExtractionService")
expect(BulkImports::FileDownloadService)
.to receive(:new)
......@@ -80,9 +82,13 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
relative_url: "/#{entity.pluralized_name}/test/export_relations/download?relation=uploads",
dir: tmpdir,
filename: 'uploads.tar.gz')
.and_return(file_download_service)
.and_return(download_service)
expect(BulkImports::FileDecompressionService).to receive(:new).with(dir: tmpdir, filename: 'uploads.tar.gz').and_return(decompression_service)
expect(BulkImports::ArchiveExtractionService).to receive(:new).with(tmpdir: tmpdir, filename: 'uploads.tar').and_return(extraction_service)
expect(file_download_service).to receive(:execute)
expect(download_service).to receive(:execute)
expect(decompression_service).to receive(:execute)
expect(extraction_service).to receive(:execute)
extracted_data = pipeline.extract(context)
......@@ -106,6 +112,16 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
expect { pipeline.load(context, uploads_dir_path) }.not_to change { portable.uploads.count }
end
end
context 'when path is a symlink' do
it 'does not upload the file' do
symlink = File.join(tmpdir, 'symlink')
FileUtils.ln_s(File.join(tmpdir, upload_file_path), symlink)
expect { pipeline.load(context, symlink) }.not_to change { portable.uploads.count }
end
end
end
end
......
......@@ -101,4 +101,39 @@ RSpec.describe Gitlab::ImportExport::CommandLineUtil do
end
end
end
describe '#untar_xf' do
let(:archive_dir) { Dir.mktmpdir }
after do
FileUtils.remove_entry(archive_dir)
end
it 'extracts archive without decompression' do
filename = 'archive.tar.gz'
archive_file = File.join(archive_dir, 'archive.tar')
FileUtils.copy_file(archive, File.join(archive_dir, filename))
subject.gunzip(dir: archive_dir, filename: filename)
result = subject.untar_xf(archive: archive_file, dir: archive_dir)
expect(result).to eq(true)
expect(File.exist?(archive_file)).to eq(true)
expect(File.exist?(File.join(archive_dir, 'project.json'))).to eq(true)
expect(Dir.exist?(File.join(archive_dir, 'uploads'))).to eq(true)
end
context 'when something goes wrong' do
it 'raises an error' do
expect(Gitlab::Popen).to receive(:popen).and_return(['Error', 1])
klass = Class.new do
include Gitlab::ImportExport::CommandLineUtil
end.new
expect { klass.untar_xf(archive: 'test', dir: 'test') }.to raise_error(Gitlab::ImportExport::Error, 'System call failed')
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe BulkImports::ArchiveExtractionService do
let_it_be(:tmpdir) { Dir.mktmpdir }
let_it_be(:filename) { 'symlink_export.tar' }
let_it_be(:filepath) { File.join(tmpdir, filename) }
before do
FileUtils.copy_file(File.join('spec', 'fixtures', filename), filepath)
end
after(:all) do
FileUtils.remove_entry(tmpdir)
end
subject(:service) { described_class.new(tmpdir: tmpdir, filename: filename) }
describe '#execute' do
it 'extracts files from archive and removes symlinks' do
file = File.join(tmpdir, 'project.json')
folder = File.join(tmpdir, 'uploads')
symlink = File.join(tmpdir, 'uploads', 'link.gitignore')
expect(service).to receive(:untar_xf).with(archive: filepath, dir: tmpdir).and_call_original
service.execute
expect(File.exist?(file)).to eq(true)
expect(Dir.exist?(folder)).to eq(true)
expect(File.exist?(symlink)).to eq(false)
end
context 'when dir is not in tmpdir' do
it 'raises an error' do
['/etc', '/usr', '/', '/home', '', '/some/other/path', Rails.root].each do |path|
expect { described_class.new(tmpdir: path, filename: 'filename').execute }
.to raise_error(BulkImports::Error, 'Invalid target directory')
end
end
end
context 'when archive file is a symlink' do
it 'raises an error' do
FileUtils.ln_s(File.join(tmpdir, filename), File.join(tmpdir, 'symlink'))
expect { described_class.new(tmpdir: tmpdir, filename: 'symlink').execute }
.to raise_error(BulkImports::Error, 'Invalid file')
end
end
context 'when filepath is being traversed' do
it 'raises an error' do
expect { described_class.new(tmpdir: File.join(tmpdir, '../../../'), filename: 'name').execute }
.to raise_error(Gitlab::Utils::PathTraversalAttackError, 'Invalid path')
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment