Commit 6f6b484a authored by Alessio Caiazza's avatar Alessio Caiazza

Don't write on shared FS when OS in enabled

Workhorse dumps uploaded files to a shared file system because rails
sends a local path with the authorization answer.

This commit removes that filed if object storage and direct upload are
enabled.
parent d8f23f51
......@@ -180,10 +180,11 @@ module ObjectStorage
end
def workhorse_authorize(has_length:, maximum_size: nil)
{
RemoteObject: workhorse_remote_upload_options(has_length: has_length, maximum_size: maximum_size),
TempPath: workhorse_local_upload_path
}.compact
if self.object_store_enabled? && self.direct_upload_enabled?
{ RemoteObject: workhorse_remote_upload_options(has_length: has_length, maximum_size: maximum_size) }
else
{ TempPath: workhorse_local_upload_path }
end
end
def workhorse_local_upload_path
......
---
title: Avoid dumping files on disk when direct_upload is enabled
merge_request: 18135
author:
type: performance
......@@ -6,6 +6,7 @@ require "fileutils"
class UploadedFile
InvalidPathError = Class.new(StandardError)
UnknownSizeError = Class.new(StandardError)
# The filename, *not* including the path, of the "uploaded" file
attr_reader :original_filename
......@@ -18,37 +19,50 @@ class UploadedFile
attr_reader :remote_id
attr_reader :sha256
attr_reader :size
def initialize(path, filename: nil, content_type: "application/octet-stream", sha256: nil, remote_id: nil)
def initialize(path, filename: nil, content_type: "application/octet-stream", sha256: nil, remote_id: nil, size: nil)
if path.present?
raise InvalidPathError, "#{path} file does not exist" unless ::File.exist?(path)
@tempfile = File.new(path, 'rb')
@size = @tempfile.size
else
begin
@size = Integer(size)
rescue ArgumentError, TypeError
raise UnknownSizeError, 'Unable to determine file size'
end
end
@content_type = content_type
@original_filename = sanitize_filename(filename || path)
@original_filename = sanitize_filename(filename || path || '')
@content_type = content_type
@sha256 = sha256
@remote_id = remote_id
@tempfile = File.new(path, 'rb')
end
def self.from_params(params, field, upload_paths)
unless params["#{field}.path"]
raise InvalidPathError, "file is invalid" if params["#{field}.remote_id"]
path = params["#{field}.path"]
remote_id = params["#{field}.remote_id"]
return if path.blank? && remote_id.blank?
return
end
file_path = File.realpath(params["#{field}.path"])
file_path = nil
if path
file_path = File.realpath(path)
paths = Array(upload_paths) << Dir.tmpdir
unless self.allowed_path?(file_path, paths.compact)
raise InvalidPathError, "insecure path used '#{file_path}'"
end
end
UploadedFile.new(file_path,
filename: params["#{field}.name"],
content_type: params["#{field}.type"] || 'application/octet-stream',
sha256: params["#{field}.sha256"],
remote_id: params["#{field}.remote_id"])
remote_id: remote_id,
size: params["#{field}.size"])
end
def self.allowed_path?(file_path, paths)
......@@ -68,7 +82,11 @@ class UploadedFile
end
def path
@tempfile.path
@tempfile&.path
end
def close
@tempfile&.close
end
alias_method :local_path, :path
......
......@@ -72,16 +72,6 @@ describe UploadedFile do
end
end
context 'when only remote id is specified' do
let(:params) do
{ 'file.remote_id' => 'remote_id' }
end
it "raises an error" do
expect { subject }.to raise_error(UploadedFile::InvalidPathError, /file is invalid/)
end
end
context 'when verifying allowed paths' do
let(:params) do
{ 'file.path' => temp_file.path }
......@@ -120,6 +110,52 @@ describe UploadedFile do
end
end
describe '.initialize' do
context 'when no size is provided' do
it 'determine size from local path' do
file = described_class.new(temp_file.path)
expect(file.size).to eq(temp_file.size)
end
it 'raises an exception if is a remote file' do
expect do
described_class.new(nil, remote_id: 'id')
end.to raise_error(UploadedFile::UnknownSizeError, 'Unable to determine file size')
end
end
context 'when size is a number' do
let_it_be(:size) { 1.gigabyte }
it 'is overridden by the size of the local file' do
file = described_class.new(temp_file.path, size: size)
expect(file.size).to eq(temp_file.size)
end
it 'is respected if is a remote file' do
file = described_class.new(nil, remote_id: 'id', size: size)
expect(file.size).to eq(size)
end
end
context 'when size is a string' do
it 'is converted to a number' do
file = described_class.new(nil, remote_id: 'id', size: '1')
expect(file.size).to eq(1)
end
it 'raises an exception if does not represent a number' do
expect do
described_class.new(nil, remote_id: 'id', size: 'not a number')
end.to raise_error(UploadedFile::UnknownSizeError, 'Unable to determine file size')
end
end
end
describe '#sanitize_filename' do
it { expect(described_class.new(temp_file.path).sanitize_filename('spaced name')).to eq('spaced_name') }
it { expect(described_class.new(temp_file.path).sanitize_filename('#$%^&')).to eq('_____') }
......
......@@ -3,6 +3,7 @@ require 'spec_helper'
describe API::Runner, :clean_gitlab_redis_shared_state do
include StubGitlabCalls
include RedisHelpers
include WorkhorseHelpers
let(:registration_token) { 'abcdefg123456' }
......@@ -1395,7 +1396,7 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
expect(response).to have_gitlab_http_status(200)
expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE)
expect(json_response['TempPath']).to eq(JobArtifactUploader.workhorse_local_upload_path)
expect(json_response).not_to have_key('TempPath')
expect(json_response['RemoteObject']).to have_key('ID')
expect(json_response['RemoteObject']).to have_key('GetURL')
expect(json_response['RemoteObject']).to have_key('StoreURL')
......@@ -1562,15 +1563,16 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let!(:fog_connection) do
stub_artifacts_object_storage(direct_upload: true)
end
before do
let(:object) do
fog_connection.directories.new(key: 'artifacts').files.create(
key: 'tmp/uploads/12312300',
body: 'content'
)
end
let(:file_upload) { fog_to_uploaded_file(object) }
upload_artifacts(file_upload, headers_with_token,
{ 'file.remote_id' => remote_id })
before do
upload_artifacts(file_upload, headers_with_token, 'file.remote_id' => remote_id)
end
context 'when valid remote_id is used' do
......@@ -1804,12 +1806,13 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
end
def upload_artifacts(file, headers = {}, params = {})
params = params.merge({
'file.path' => file.path,
'file.name' => file.original_filename
})
post api("/jobs/#{job.id}/artifacts"), params: params, headers: headers
workhorse_finalize(
api("/jobs/#{job.id}/artifacts"),
method: :post,
file_key: :file,
params: params.merge(file: file),
headers: headers
)
end
end
......
......@@ -4,6 +4,7 @@ require 'spec_helper'
describe 'Git LFS API and storage' do
include LfsHttpHelpers
include ProjectForksHelper
include WorkhorseHelpers
set(:project) { create(:project, :repository) }
set(:other_project) { create(:project, :repository) }
......@@ -933,7 +934,7 @@ describe 'Git LFS API and storage' do
it_behaves_like 'a valid response' do
it 'responds with status 200, location of LFS remote store and object details' do
expect(json_response['TempPath']).to eq(LfsObjectUploader.workhorse_local_upload_path)
expect(json_response).not_to have_key('TempPath')
expect(json_response['RemoteObject']).to have_key('ID')
expect(json_response['RemoteObject']).to have_key('GetURL')
expect(json_response['RemoteObject']).to have_key('StoreURL')
......@@ -992,10 +993,17 @@ describe 'Git LFS API and storage' do
stub_lfs_object_storage(direct_upload: true)
end
let(:tmp_object) do
fog_connection.directories.new(key: 'lfs-objects').files.create(
key: 'tmp/uploads/12312300',
body: 'content'
)
end
['123123', '../../123123'].each do |remote_id|
context "with invalid remote_id: #{remote_id}" do
subject do
put_finalize(with_tempfile: true, args: {
put_finalize(remote_object: tmp_object, args: {
'file.remote_id' => remote_id
})
end
......@@ -1009,15 +1017,8 @@ describe 'Git LFS API and storage' do
end
context 'with valid remote_id' do
before do
fog_connection.directories.new(key: 'lfs-objects').files.create(
key: 'tmp/uploads/12312300',
body: 'content'
)
end
subject do
put_finalize(with_tempfile: true, args: {
put_finalize(remote_object: tmp_object, args: {
'file.remote_id' => '12312300',
'file.name' => 'name'
})
......@@ -1027,6 +1028,10 @@ describe 'Git LFS API and storage' do
subject
expect(response).to have_gitlab_http_status(200)
object = LfsObject.find_by_oid(sample_oid)
expect(object).to be_present
expect(object.file.read).to eq(tmp_object.body)
end
it 'schedules migration of file to object storage' do
......@@ -1268,28 +1273,31 @@ describe 'Git LFS API and storage' do
put authorize_url(project, sample_oid, sample_size), params: {}, headers: authorize_headers
end
def put_finalize(lfs_tmp = lfs_tmp_file, with_tempfile: false, verified: true, args: {})
def put_finalize(lfs_tmp = lfs_tmp_file, with_tempfile: false, verified: true, remote_object: nil, args: {})
uploaded_file = nil
if with_tempfile
upload_path = LfsObjectUploader.workhorse_local_upload_path
file_path = upload_path + '/' + lfs_tmp if lfs_tmp
if with_tempfile
FileUtils.mkdir_p(upload_path)
FileUtils.touch(file_path)
end
extra_args = {
'file.path' => file_path,
'file.name' => File.basename(file_path)
}
put_finalize_with_args(args.merge(extra_args).compact, verified: verified)
uploaded_file = UploadedFile.new(file_path, filename: File.basename(file_path))
elsif remote_object
uploaded_file = fog_to_uploaded_file(remote_object)
end
def put_finalize_with_args(args, verified:)
finalize_headers = headers
finalize_headers.merge!(workhorse_internal_api_request_header) if verified
put objects_url(project, sample_oid, sample_size), params: args, headers: finalize_headers
workhorse_finalize(
objects_url(project, sample_oid, sample_size),
method: :put,
file_key: :file,
params: args.merge(file: uploaded_file),
headers: finalize_headers
)
end
def lfs_tmp_file
......
......@@ -22,16 +22,40 @@ module WorkhorseHelpers
# workhorse_post_with_file will transform file_key inside params as if it was disk accelerated by workhorse
def workhorse_post_with_file(url, file_key:, params:)
workhorse_request_with_file(:post, url,
file_key: file_key,
params: params,
env: { 'CONTENT_TYPE' => 'multipart/form-data' },
send_rewritten_field: true
)
end
# workhorse_finalize will transform file_key inside params as if it was the finalize call of an inline object storage upload.
# note that based on the content of the params it can simulate a disc acceleration or an object storage upload
def workhorse_finalize(url, method: :post, file_key:, params:, headers: {})
workhorse_request_with_file(method, url,
file_key: file_key,
params: params,
extra_headers: headers,
send_rewritten_field: false
)
end
def workhorse_request_with_file(method, url, file_key:, params:, env: {}, extra_headers: {}, send_rewritten_field:)
workhorse_params = params.dup
file = workhorse_params.delete(file_key)
workhorse_params.merge!(workhorse_disk_accelerated_file_params(file_key, file))
workhorse_params = workhorse_disk_accelerated_file_params(file_key, file).merge(workhorse_params)
post(url,
params: workhorse_params,
headers: workhorse_rewritten_fields_header(file_key => file.path),
env: { 'CONTENT_TYPE' => 'multipart/form-data' }
)
headers = if send_rewritten_field
workhorse_rewritten_fields_header(file_key => file.path)
else
{}
end
headers.merge!(extra_headers)
process(method, url, params: workhorse_params, headers: headers, env: env)
end
private
......@@ -45,9 +69,24 @@ module WorkhorseHelpers
end
def workhorse_disk_accelerated_file_params(key, file)
return {} unless file
{
"#{key}.name" => file.original_filename,
"#{key}.path" => file.path
}
"#{key}.size" => file.size
}.tap do |params|
params["#{key}.path"] = file.path if file.path
params["#{key}.remote_id"] = file.remote_id if file.respond_to?(:remote_id) && file.remote_id
end
end
def fog_to_uploaded_file(file)
filename = File.basename(file.key)
UploadedFile.new(nil,
filename: filename,
remote_id: filename,
size: file.content_length
)
end
end
......@@ -338,7 +338,7 @@ shared_examples 'handle uploads authorize' do
it_behaves_like 'a valid response' do
it 'responds with status 200, location of uploads remote store and object details' do
expect(json_response['TempPath']).to eq(uploader_class.workhorse_local_upload_path)
expect(json_response).not_to have_key('TempPath')
expect(json_response['RemoteObject']).to have_key('ID')
expect(json_response['RemoteObject']).to have_key('GetURL')
expect(json_response['RemoteObject']).to have_key('StoreURL')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment