Commit 7d06e7d7 authored by Matthias Käppler's avatar Matthias Käppler

Merge branch 'georgekoltsov/update-import-archive-file-validator' into 'master'

Update Import archive size validator

See merge request gitlab-org/gitlab!52893
parents b488e2f5 9e81a3a0
# frozen_string_literal: true # frozen_string_literal: true
require 'zlib'
module Gitlab module Gitlab
module ImportExport module ImportExport
class DecompressedArchiveSizeValidator class DecompressedArchiveSizeValidator
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
DEFAULT_MAX_BYTES = 10.gigabytes.freeze DEFAULT_MAX_BYTES = 10.gigabytes.freeze
CHUNK_SIZE = 4096.freeze TIMEOUT_LIMIT = 60.seconds
attr_reader :error
def initialize(archive_path:, max_bytes: self.class.max_bytes) def initialize(archive_path:, max_bytes: self.class.max_bytes)
@archive_path = archive_path @archive_path = archive_path
@max_bytes = max_bytes @max_bytes = max_bytes
@bytes_read = 0
@total_reads = 0
@denominator = 5
@error = nil
end end
def valid? def valid?
...@@ -31,59 +23,62 @@ module Gitlab ...@@ -31,59 +23,62 @@ module Gitlab
DEFAULT_MAX_BYTES DEFAULT_MAX_BYTES
end end
def archive_file
@archive_file ||= File.open(@archive_path)
end
private private
def validate def validate
until archive_file.eof? pgrp = nil
compressed_chunk = archive_file.read(CHUNK_SIZE) valid_archive = true
inflate_stream.inflate(compressed_chunk) do |chunk| Timeout.timeout(TIMEOUT_LIMIT) do
@bytes_read += chunk.size stdin, stdout, stderr, wait_thr = Open3.popen3(command, pgroup: true)
@total_reads += 1 stdin.close
end pgrp = Process.getpgid(wait_thr[:pid])
status = wait_thr.value
# Start garbage collection every 5 reads in order if status.success?
# to prevent memory bloat during archive decompression result = stdout.readline
GC.start if gc_start?
if @bytes_read > @max_bytes if result.to_i > @max_bytes
@error = error_message valid_archive = false
return false log_error('Decompressed archive size limit reached')
end end
else
valid_archive = false
log_error(stderr.readline)
end end
true ensure
rescue => e stdout.close
@error = error_message stderr.close
end
Gitlab::ErrorTracking.track_exception(e) valid_archive
rescue Timeout::Error
log_error('Timeout reached during archive decompression')
Gitlab::Import::Logger.info( Process.kill(-1, pgrp) if pgrp
message: @error,
error: e.message
)
false false
ensure rescue => e
inflate_stream.close log_error(e.message)
archive_file.close
end
def inflate_stream Process.kill(-1, pgrp) if pgrp
@inflate_stream ||= Zlib::Inflate.new(Zlib::MAX_WBITS + 32)
false
end end
def gc_start? def command
@total_reads % @denominator == 0 "gzip -dc #{@archive_path} | wc -c"
end end
def error_message def log_error(error)
_('Decompressed archive size validation failed.') Gitlab::Import::Logger.info(
message: error,
import_upload_archive_path: @archive_path,
import_upload_archive_size: File.size(@archive_path)
)
end end
end end
end end
......
...@@ -87,7 +87,7 @@ module Gitlab ...@@ -87,7 +87,7 @@ module Gitlab
end end
def validate_decompressed_archive_size def validate_decompressed_archive_size
raise ImporterError.new(size_validator.error) unless size_validator.valid? raise ImporterError.new(_('Decompressed archive size validation failed.')) unless size_validator.valid?
end end
def size_validator def size_validator
......
...@@ -27,25 +27,55 @@ RSpec.describe Gitlab::ImportExport::DecompressedArchiveSizeValidator do ...@@ -27,25 +27,55 @@ RSpec.describe Gitlab::ImportExport::DecompressedArchiveSizeValidator do
end end
context 'when file exceeds allowed decompressed size' do context 'when file exceeds allowed decompressed size' do
it 'returns false' do it 'logs error message returns false' do
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
import_upload_archive_path: filepath,
import_upload_archive_size: File.size(filepath),
message: 'Decompressed archive size limit reached'
)
expect(subject.valid?).to eq(false) expect(subject.valid?).to eq(false)
end end
end end
context 'when something goes wrong during decompression' do context 'when exception occurs during decompression' do
shared_examples 'logs raised exception and terminates validator process group' do
let(:std) { double(:std, close: nil, value: nil) }
let(:wait_thr) { double }
before do before do
allow(subject.archive_file).to receive(:eof?).and_raise(StandardError) allow(Process).to receive(:getpgid).and_return(2)
allow(Open3).to receive(:popen3).and_return([std, std, std, wait_thr])
allow(wait_thr).to receive(:[]).with(:pid).and_return(1)
allow(wait_thr).to receive(:value).and_raise(exception)
end end
it 'logs and tracks raised exception' do it 'logs raised exception and terminates validator process group' do
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(instance_of(StandardError)) expect(Gitlab::Import::Logger)
expect(Gitlab::Import::Logger).to receive(:info).with(hash_including(message: 'Decompressed archive size validation failed.')) .to receive(:info)
.with(
import_upload_archive_path: filepath,
import_upload_archive_size: File.size(filepath),
message: error_message
)
expect(Process).to receive(:kill).with(-1, 2)
expect(subject.valid?).to eq(false)
end
end
subject.valid? context 'when timeout occurs' do
let(:error_message) { 'Timeout reached during archive decompression' }
let(:exception) { Timeout::Error }
include_examples 'logs raised exception and terminates validator process group'
end end
it 'returns false' do context 'when exception occurs' do
expect(subject.valid?).to eq(false) let(:error_message) { 'Error!' }
let(:exception) { StandardError.new(error_message) }
include_examples 'logs raised exception and terminates validator process group'
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment