Commit e188f963 authored by Markus Koller's avatar Markus Koller

Merge branch 'georgekoltsov/update-bulk-import-services-tmpdir' into 'master'

Refactor BulkImports tmpdir variable & update few validations

See merge request gitlab-org/gitlab!78113
parents f43c7bfd b312c776
......@@ -28,8 +28,8 @@ module BulkImports
end
def execute
validate_filepath
validate_tmpdir
validate_filepath
validate_symlink
extract_archive
......@@ -46,7 +46,7 @@ module BulkImports
end
def validate_tmpdir
raise(BulkImports::Error, 'Invalid target directory') unless File.expand_path(tmpdir).start_with?(Dir.tmpdir)
Gitlab::Utils.check_allowed_absolute_path!(tmpdir, [Dir.tmpdir])
end
def validate_symlink
......
# frozen_string_literal: true
# File Decompression Service allows gzipped files decompression into tmp directory.
#
# @param tmpdir [String] Temp directory to store downloaded file to. Must be located under `Dir.tmpdir`.
# @param filename [String] Name of the file to decompress.
module BulkImports
class FileDecompressionService
include Gitlab::ImportExport::CommandLineUtil
ServiceError = Class.new(StandardError)
def initialize(dir:, filename:)
@dir = dir
def initialize(tmpdir:, filename:)
@tmpdir = tmpdir
@filename = filename
@filepath = File.join(@dir, @filename)
@filepath = File.join(@tmpdir, @filename)
@decompressed_filename = File.basename(@filename, '.gz')
@decompressed_filepath = File.join(@dir, @decompressed_filename)
@decompressed_filepath = File.join(@tmpdir, @decompressed_filename)
end
def execute
validate_dir
validate_tmpdir
validate_filepath
validate_decompressed_file_size if Feature.enabled?(:validate_import_decompressed_archive_size, default_enabled: :yaml)
validate_symlink(filepath)
......@@ -33,10 +38,14 @@ module BulkImports
private
attr_reader :dir, :filename, :filepath, :decompressed_filename, :decompressed_filepath
attr_reader :tmpdir, :filename, :filepath, :decompressed_filename, :decompressed_filepath
def validate_dir
raise(ServiceError, 'Invalid target directory') unless dir.start_with?(Dir.tmpdir)
def validate_filepath
Gitlab::Utils.check_path_traversal!(filepath)
end
def validate_tmpdir
Gitlab::Utils.check_allowed_absolute_path!(tmpdir, [Dir.tmpdir])
end
def validate_decompressed_file_size
......@@ -48,7 +57,7 @@ module BulkImports
end
def decompress_file
gunzip(dir: dir, filename: filename)
gunzip(dir: tmpdir, filename: filename)
end
def size_validator
......
# frozen_string_literal: true
# Downloads a remote file. If no filename is given, it'll use the remote filename
# File Download Service allows remote file download into tmp directory.
#
# @param configuration [BulkImports::Configuration] Config object containing url and access token
# @param relative_url [String] Relative URL to download the file from
# @param tmpdir [String] Temp directory to store downloaded file to. Must be located under `Dir.tmpdir`.
# @param file_size_limit [Integer] Maximum allowed file size
# @param allowed_content_types [Array<String>] Allowed file content types
# @param filename [String] Name of the file to download, if known. Use remote filename if none given.
module BulkImports
class FileDownloadService
ServiceError = Class.new(StandardError)
......@@ -13,20 +20,21 @@ module BulkImports
def initialize(
configuration:,
relative_url:,
dir:,
tmpdir:,
file_size_limit: DEFAULT_FILE_SIZE_LIMIT,
allowed_content_types: DEFAULT_ALLOWED_CONTENT_TYPES,
filename: nil)
@configuration = configuration
@relative_url = relative_url
@filename = filename
@dir = dir
@tmpdir = tmpdir
@file_size_limit = file_size_limit
@allowed_content_types = allowed_content_types
end
def execute
validate_dir
validate_tmpdir
validate_filepath
validate_url
validate_content_type
validate_content_length
......@@ -40,7 +48,7 @@ module BulkImports
private
attr_reader :configuration, :relative_url, :dir, :file_size_limit, :allowed_content_types
attr_reader :configuration, :relative_url, :tmpdir, :file_size_limit, :allowed_content_types
def download_file
File.open(filepath, 'wb') do |file|
......@@ -76,8 +84,12 @@ module BulkImports
@headers ||= http_client.head(relative_url).headers
end
def validate_dir
raise(ServiceError, 'Invalid target directory') unless dir.start_with?(Dir.tmpdir)
def validate_filepath
Gitlab::Utils.check_path_traversal!(filepath)
end
def validate_tmpdir
Gitlab::Utils.check_allowed_absolute_path!(tmpdir, [Dir.tmpdir])
end
def validate_symlink
......@@ -119,7 +131,7 @@ module BulkImports
end
def filepath
@filepath ||= File.join(@dir, filename)
@filepath ||= File.join(@tmpdir, filename)
end
def filename
......
......@@ -4,49 +4,47 @@ module BulkImports
module Common
module Extractors
class NdjsonExtractor
include Gitlab::ImportExport::CommandLineUtil
include Gitlab::Utils::StrongMemoize
def initialize(relation:)
@relation = relation
@tmp_dir = Dir.mktmpdir
@tmpdir = Dir.mktmpdir
end
def extract(context)
download_service(tmp_dir, context).execute
decompression_service(tmp_dir).execute
relations = ndjson_reader(tmp_dir).consume_relation('', relation)
download_service(context).execute
decompression_service.execute
records = ndjson_reader.consume_relation('', relation)
BulkImports::Pipeline::ExtractedData.new(data: relations)
BulkImports::Pipeline::ExtractedData.new(data: records)
end
def remove_tmp_dir
FileUtils.remove_entry(tmp_dir)
def remove_tmpdir
FileUtils.remove_entry(tmpdir) if Dir.exist?(tmpdir)
end
private
attr_reader :relation, :tmp_dir
attr_reader :relation, :tmpdir
def filename
@filename ||= "#{relation}.ndjson.gz"
"#{relation}.ndjson.gz"
end
def download_service(tmp_dir, context)
def download_service(context)
@download_service ||= BulkImports::FileDownloadService.new(
configuration: context.configuration,
relative_url: context.entity.relation_download_url_path(relation),
dir: tmp_dir,
tmpdir: tmpdir,
filename: filename
)
end
def decompression_service(tmp_dir)
@decompression_service ||= BulkImports::FileDecompressionService.new(dir: tmp_dir, filename: filename)
def decompression_service
@decompression_service ||= BulkImports::FileDecompressionService.new(tmpdir: tmpdir, filename: filename)
end
def ndjson_reader(tmp_dir)
@ndjson_reader ||= Gitlab::ImportExport::Json::NdjsonReader.new(tmp_dir)
def ndjson_reader
@ndjson_reader ||= Gitlab::ImportExport::Json::NdjsonReader.new(tmpdir)
end
end
end
......
......@@ -15,7 +15,7 @@ module BulkImports
decompression_service.execute
extraction_service.execute
upload_file_paths = Dir.glob(File.join(tmp_dir, '**', '*'))
upload_file_paths = Dir.glob(File.join(tmpdir, '**', '*'))
BulkImports::Pipeline::ExtractedData.new(data: upload_file_paths)
end
......@@ -37,7 +37,7 @@ module BulkImports
end
def after_run(_)
FileUtils.remove_entry(tmp_dir) if Dir.exist?(tmp_dir)
FileUtils.remove_entry(tmpdir) if Dir.exist?(tmpdir)
end
private
......@@ -46,17 +46,17 @@ module BulkImports
BulkImports::FileDownloadService.new(
configuration: context.configuration,
relative_url: context.entity.relation_download_url_path(relation),
dir: tmp_dir,
tmpdir: tmpdir,
filename: targz_filename
)
end
def decompression_service
BulkImports::FileDecompressionService.new(dir: tmp_dir, filename: targz_filename)
BulkImports::FileDecompressionService.new(tmpdir: tmpdir, filename: targz_filename)
end
def extraction_service
BulkImports::ArchiveExtractionService.new(tmpdir: tmp_dir, filename: tar_filename)
BulkImports::ArchiveExtractionService.new(tmpdir: tmpdir, filename: tar_filename)
end
def relation
......@@ -71,8 +71,8 @@ module BulkImports
"#{tar_filename}.gz"
end
def tmp_dir
@tmp_dir ||= Dir.mktmpdir('bulk_imports')
def tmpdir
@tmpdir ||= Dir.mktmpdir('bulk_imports')
end
def file_uploader
......
......@@ -68,7 +68,7 @@ module BulkImports
end
def after_run(_)
extractor.remove_tmp_dir if extractor.respond_to?(:remove_tmp_dir)
extractor.remove_tmpdir if extractor.respond_to?(:remove_tmpdir)
end
def relation_class(relation_key)
......
......@@ -8,15 +8,16 @@ module BulkImports
transformer ::BulkImports::Common::Transformers::ProhibitedAttributesTransformer
def extract(context)
download_service(tmp_dir, context).execute
decompression_service(tmp_dir).execute
def extract(_context)
download_service.execute
decompression_service.execute
project_attributes = json_decode(json_attributes)
BulkImports::Pipeline::ExtractedData.new(data: project_attributes)
end
def transform(_, data)
def transform(_context, data)
subrelations = config.portable_relations_tree.keys.map(&:to_s)
Gitlab::ImportExport::AttributeCleaner.clean(
......@@ -26,42 +27,42 @@ module BulkImports
).except(*subrelations)
end
def load(_, data)
def load(_context, data)
portable.assign_attributes(data)
portable.reconcile_shared_runners_setting!
portable.drop_visibility_level!
portable.save!
end
def after_run(_)
FileUtils.remove_entry(tmp_dir)
def after_run(_context)
FileUtils.remove_entry(tmpdir) if Dir.exist?(tmpdir)
end
def json_attributes
@json_attributes ||= File.read(File.join(tmp_dir, filename))
@json_attributes ||= File.read(File.join(tmpdir, filename))
end
private
def tmp_dir
@tmp_dir ||= Dir.mktmpdir
def tmpdir
@tmpdir ||= Dir.mktmpdir('bulk_imports')
end
def config
@config ||= BulkImports::FileTransfer.config_for(portable)
end
def download_service(tmp_dir, context)
def download_service
@download_service ||= BulkImports::FileDownloadService.new(
configuration: context.configuration,
relative_url: context.entity.relation_download_url_path(BulkImports::FileTransfer::BaseConfig::SELF_RELATION),
dir: tmp_dir,
relative_url: context.entity.relation_download_url_path(BulkImports::FileTransfer::BaseConfig::SELF_RELATION),
tmpdir: tmpdir,
filename: compressed_filename
)
end
def decompression_service(tmp_dir)
@decompression_service ||= BulkImports::FileDecompressionService.new(dir: tmp_dir, filename: compressed_filename)
def decompression_service
@decompression_service ||= BulkImports::FileDecompressionService.new(tmpdir: tmpdir, filename: compressed_filename)
end
def compressed_filename
......
......@@ -16,7 +16,7 @@ RSpec.describe BulkImports::Common::Extractors::NdjsonExtractor do
before do
allow(FileUtils).to receive(:remove_entry).with(any_args).and_call_original
subject.instance_variable_set(:@tmp_dir, tmpdir)
subject.instance_variable_set(:@tmpdir, tmpdir)
end
after(:all) do
......@@ -43,11 +43,11 @@ RSpec.describe BulkImports::Common::Extractors::NdjsonExtractor do
end
end
describe '#remove_tmp_dir' do
describe '#remove_tmpdir' do
it 'removes tmp dir' do
expect(FileUtils).to receive(:remove_entry).with(tmpdir).once
subject.remove_tmp_dir
subject.remove_tmpdir
end
end
end
......@@ -3,10 +3,10 @@
require 'spec_helper'
RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
let_it_be(:tmpdir) { Dir.mktmpdir }
let_it_be(:project) { create(:project) }
let_it_be(:group) { create(:group) }
let(:tmpdir) { Dir.mktmpdir }
let(:uploads_dir_path) { File.join(tmpdir, '72a497a02fe3ee09edae2ed06d390038') }
let(:upload_file_path) { File.join(uploads_dir_path, 'upload.txt')}
let(:tracker) { create(:bulk_import_tracker, entity: entity) }
......@@ -80,10 +80,10 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
.with(
configuration: context.configuration,
relative_url: "/#{entity.pluralized_name}/test/export_relations/download?relation=uploads",
dir: tmpdir,
tmpdir: tmpdir,
filename: 'uploads.tar.gz')
.and_return(download_service)
expect(BulkImports::FileDecompressionService).to receive(:new).with(dir: tmpdir, filename: 'uploads.tar.gz').and_return(decompression_service)
expect(BulkImports::FileDecompressionService).to receive(:new).with(tmpdir: tmpdir, filename: 'uploads.tar.gz').and_return(decompression_service)
expect(BulkImports::ArchiveExtractionService).to receive(:new).with(tmpdir: tmpdir, filename: 'uploads.tar').and_return(extraction_service)
expect(download_service).to receive(:execute)
......@@ -123,6 +123,31 @@ RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
end
end
end
describe '#after_run' do
before do
allow(Dir).to receive(:mktmpdir).with('bulk_imports').and_return(tmpdir)
end
it 'removes tmp dir' do
allow(FileUtils).to receive(:remove_entry).and_call_original
expect(FileUtils).to receive(:remove_entry).with(tmpdir).and_call_original
pipeline.after_run(nil)
expect(Dir.exist?(tmpdir)).to eq(false)
end
context 'when dir does not exist' do
it 'does not attempt to remove tmpdir' do
FileUtils.remove_entry(tmpdir)
expect(FileUtils).not_to receive(:remove_entry).with(tmpdir)
pipeline.after_run(nil)
end
end
end
end
context 'when importing to group' do
......
......@@ -56,7 +56,7 @@ RSpec.describe BulkImports::Projects::Pipelines::ProjectAttributesPipeline do
subject(:pipeline) { described_class.new(context) }
before do
allow(Dir).to receive(:mktmpdir).and_return(tmpdir)
allow(Dir).to receive(:mktmpdir).with('bulk_imports').and_return(tmpdir)
end
after do
......@@ -95,13 +95,13 @@ RSpec.describe BulkImports::Projects::Pipelines::ProjectAttributesPipeline do
.with(
configuration: context.configuration,
relative_url: "/#{entity.pluralized_name}/#{entity.source_full_path}/export_relations/download?relation=self",
dir: tmpdir,
tmpdir: tmpdir,
filename: 'self.json.gz')
.and_return(file_download_service)
expect(BulkImports::FileDecompressionService)
.to receive(:new)
.with(dir: tmpdir, filename: 'self.json.gz')
.with(tmpdir: tmpdir, filename: 'self.json.gz')
.and_return(file_decompression_service)
expect(file_download_service).to receive(:execute)
......@@ -156,4 +156,25 @@ RSpec.describe BulkImports::Projects::Pipelines::ProjectAttributesPipeline do
pipeline.json_attributes
end
end
describe '#after_run' do
it 'removes tmp dir' do
allow(FileUtils).to receive(:remove_entry).and_call_original
expect(FileUtils).to receive(:remove_entry).with(tmpdir).and_call_original
pipeline.after_run(nil)
expect(Dir.exist?(tmpdir)).to eq(false)
end
context 'when dir does not exist' do
it 'does not attempt to remove tmpdir' do
FileUtils.remove_entry(tmpdir)
expect(FileUtils).not_to receive(:remove_entry).with(tmpdir)
pipeline.after_run(nil)
end
end
end
end
......@@ -34,9 +34,9 @@ RSpec.describe BulkImports::ArchiveExtractionService do
context 'when dir is not in tmpdir' do
it 'raises an error' do
['/etc', '/usr', '/', '/home', '', '/some/other/path', Rails.root].each do |path|
['/etc', '/usr', '/', '/home', '/some/other/path', Rails.root.to_s].each do |path|
expect { described_class.new(tmpdir: path, filename: 'filename').execute }
.to raise_error(BulkImports::Error, 'Invalid target directory')
.to raise_error(StandardError, "path #{path} is not allowed")
end
end
end
......@@ -52,7 +52,7 @@ RSpec.describe BulkImports::ArchiveExtractionService do
context 'when filepath is being traversed' do
it 'raises an error' do
expect { described_class.new(tmpdir: File.join(tmpdir, '../../../'), filename: 'name').execute }
expect { described_class.new(tmpdir: File.join(Dir.mktmpdir, 'test', '..'), filename: 'name').execute }
.to raise_error(Gitlab::Utils::PathTraversalAttackError, 'Invalid path')
end
end
......
......@@ -18,7 +18,7 @@ RSpec.describe BulkImports::FileDecompressionService do
FileUtils.remove_entry(tmpdir)
end
subject { described_class.new(dir: tmpdir, filename: gz_filename) }
subject { described_class.new(tmpdir: tmpdir, filename: gz_filename) }
describe '#execute' do
it 'decompresses specified file' do
......@@ -55,10 +55,18 @@ RSpec.describe BulkImports::FileDecompressionService do
end
context 'when dir is not in tmpdir' do
subject { described_class.new(dir: '/etc', filename: 'filename') }
subject { described_class.new(tmpdir: '/etc', filename: 'filename') }
it 'raises an error' do
expect { subject.execute }.to raise_error(described_class::ServiceError, 'Invalid target directory')
expect { subject.execute }.to raise_error(StandardError, 'path /etc is not allowed')
end
end
context 'when path is being traversed' do
subject { described_class.new(tmpdir: File.join(Dir.mktmpdir, 'test', '..'), filename: 'filename') }
it 'raises an error' do
expect { subject.execute }.to raise_error(Gitlab::Utils::PathTraversalAttackError, 'Invalid path')
end
end
......@@ -69,7 +77,7 @@ RSpec.describe BulkImports::FileDecompressionService do
FileUtils.ln_s(File.join(tmpdir, gz_filename), symlink)
end
subject { described_class.new(dir: tmpdir, filename: 'symlink.gz') }
subject { described_class.new(tmpdir: tmpdir, filename: 'symlink.gz') }
it 'raises an error and removes the file' do
expect { subject.execute }.to raise_error(described_class::ServiceError, 'Invalid file')
......@@ -87,7 +95,7 @@ RSpec.describe BulkImports::FileDecompressionService do
subject.instance_variable_set(:@decompressed_filepath, symlink)
end
subject { described_class.new(dir: tmpdir, filename: gz_filename) }
subject { described_class.new(tmpdir: tmpdir, filename: gz_filename) }
it 'raises an error and removes the file' do
expect { subject.execute }.to raise_error(described_class::ServiceError, 'Invalid file')
......
......@@ -33,7 +33,7 @@ RSpec.describe BulkImports::FileDownloadService do
described_class.new(
configuration: config,
relative_url: '/test',
dir: tmpdir,
tmpdir: tmpdir,
filename: filename,
file_size_limit: file_size_limit,
allowed_content_types: allowed_content_types
......@@ -72,7 +72,7 @@ RSpec.describe BulkImports::FileDownloadService do
service = described_class.new(
configuration: double,
relative_url: '/test',
dir: tmpdir,
tmpdir: tmpdir,
filename: filename,
file_size_limit: file_size_limit,
allowed_content_types: allowed_content_types
......@@ -157,7 +157,7 @@ RSpec.describe BulkImports::FileDownloadService do
described_class.new(
configuration: config,
relative_url: '/test',
dir: tmpdir,
tmpdir: tmpdir,
filename: 'symlink',
file_size_limit: file_size_limit,
allowed_content_types: allowed_content_types
......@@ -179,7 +179,7 @@ RSpec.describe BulkImports::FileDownloadService do
described_class.new(
configuration: config,
relative_url: '/test',
dir: '/etc',
tmpdir: '/etc',
filename: filename,
file_size_limit: file_size_limit,
allowed_content_types: allowed_content_types
......@@ -188,8 +188,28 @@ RSpec.describe BulkImports::FileDownloadService do
it 'raises an error' do
expect { subject.execute }.to raise_error(
described_class::ServiceError,
'Invalid target directory'
StandardError,
'path /etc is not allowed'
)
end
end
context 'when dir path is being traversed' do
subject do
described_class.new(
configuration: config,
relative_url: '/test',
tmpdir: File.join(Dir.mktmpdir('bulk_imports'), 'test', '..'),
filename: filename,
file_size_limit: file_size_limit,
allowed_content_types: allowed_content_types
)
end
it 'raises an error' do
expect { subject.execute }.to raise_error(
Gitlab::Utils::PathTraversalAttackError,
'Invalid path'
)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment