Commit 62a2d8cb authored by Mike Kozono's avatar Mike Kozono

Geo: Fix syncing remote stored Blobs

When direct download is used.

`http.rb`'s `follow` method forwards headers from the original request
with the redirect, and it's normal for the remote store to respond 400
Bad Request if it receives an unexpected header like:
`Authorization: Gl-Geo ...`. So here we follow redirects manually.

Also, copy this timeout fix at the same time since it's in the same
lines of code:
https://gitlab.com/gitlab-org/gitlab/-/commit/15802cc45dfa88bda5b6b388a7e8fd16f450c917?merge_request_iid=36012
parent 3d59c8a7
---
title: 'Geo: Fix syncing direct download remote stored Blob types'
merge_request: 55775
author:
type: fixed
...@@ -4,7 +4,12 @@ module Gitlab ...@@ -4,7 +4,12 @@ module Gitlab
module Geo module Geo
module Replication module Replication
class BlobDownloader class BlobDownloader
TEMP_PREFIX = 'tmp_'.freeze TEMP_PREFIX = 'tmp_'
DOWNLOAD_TIMEOUT = {
connect: 60,
write: 60,
read: 60
}.freeze
attr_reader :replicator attr_reader :replicator
...@@ -143,7 +148,17 @@ module Gitlab ...@@ -143,7 +148,17 @@ module Gitlab
file_size = -1 file_size = -1
# Make the request # Make the request
response = ::HTTP.follow.get(url, headers: req_headers) response = ::HTTP.timeout(DOWNLOAD_TIMEOUT.dup).get(url, headers: req_headers)
if response.status.redirect?
# ::HTTP.follow passes through all headers (including our
# `Authorization: Gl-Geo ...` header) when the primary uses object
# storage with direct download.
# https://gitlab.com/gitlab-org/gitlab/-/issues/323495
#
# So we manually follow the redirect instead.
response = ::HTTP.timeout(DOWNLOAD_TIMEOUT.dup).get(response['Location'])
end
# Check for failures # Check for failures
unless response.status.success? unless response.status.success?
......
...@@ -54,23 +54,70 @@ RSpec.describe Gitlab::Geo::Replication::BlobDownloader do ...@@ -54,23 +54,70 @@ RSpec.describe Gitlab::Geo::Replication::BlobDownloader do
end end
context 'when the file is on Object Storage' do context 'when the file is on Object Storage' do
let!(:secondary_object_storage) { create(:geo_node, sync_object_storage: sync_object_storage) }
before do before do
stub_package_file_object_storage(enabled: true, direct_upload: true) stub_package_file_object_storage(enabled: true, direct_upload: true)
stub_current_geo_node(secondary_object_storage)
end end
let!(:model_record) { create(:package_file, :npm, :object_storage) } let!(:model_record) { create(:package_file, :npm, :object_storage) }
subject { described_class.new(replicator: model_record.replicator) } subject { described_class.new(replicator: model_record.replicator) }
context 'with object storage sync disabled' do context 'with object storage sync enabled' do
let(:sync_object_storage) { true }
context 'when the primary proxies remote storage' do
it 'returns success' do
content = replicator.carrierwave_uploader.file.read
size = content.bytesize
stub_request(:get, subject.resource_url)
.to_return(status: 200, body: content)
result = subject.execute
expect_blob_downloader_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
end
end
context 'when the primary redirects to remote storage' do
let(:geo_internal_headers) { { 'Authorization' => 'Gl-Geo: abc123' } }
let(:content) { replicator.carrierwave_uploader.file.read }
let(:size) { content.bytesize }
let(:remote_url) { replicator.carrierwave_uploader.url }
before do before do
secondary.update_column(:sync_object_storage, false) # Set up to ensure that our redirect follow implementation does
# not pass through all headers.
allow_next_instance_of(Gitlab::Geo::TransferRequest) do |request|
allow(request).to receive(:headers).and_return(geo_internal_headers)
end end
it 'returns failure' do stub_request(:get, subject.resource_url)
.to_return(status: 302, headers: { 'Location' => remote_url })
# This stub is intended to cause this test to fail when all
# headers are passed through (per HTTP.rb `follow` behavior) to
# the redirect location.
#
# Some S3-compatible storages respond with 400 Bad Request when
# there are unexpected headers. See
# https://gitlab.com/gitlab-org/gitlab/-/issues/201995
stub_request(:get, remote_url)
.to_return(status: 400, body: content, headers: geo_internal_headers)
stub_request(:get, remote_url)
.to_return(status: 200, body: content)
end
it 'returns success', :aggregate_failures do
result = subject.execute result = subject.execute
expect(result.success).to be_falsey expect_blob_downloader_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
# Expect that the redirect is followed
expect(WebMock).to have_requested(:get, remote_url)
end end
end end
...@@ -86,6 +133,17 @@ RSpec.describe Gitlab::Geo::Replication::BlobDownloader do ...@@ -86,6 +133,17 @@ RSpec.describe Gitlab::Geo::Replication::BlobDownloader do
end end
end end
end end
context 'with object storage sync disabled' do
let(:sync_object_storage) { false }
it 'returns failure' do
result = subject.execute
expect(result.success).to be_falsey
end
end
end
end end
context 'when an error occurs while getting a Tempfile' do context 'when an error occurs while getting a Tempfile' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment