Commit 479fa52d authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '202035-geo-http-clone-pull-redirect-to-primary-redirect' into 'master'

Geo: Support HTTP git operations for repos that are not yet replicated

Closes #202035

See merge request gitlab-org/gitlab!27072
parents 4aad18a1 22659d87
......@@ -5,22 +5,33 @@ module EE
module GitHttpClientController
extend ActiveSupport::Concern
# This module is responsible for determining if an incoming secondary bound
# HTTP request should be redirected to the primary.
# This module is responsible for determining if an incoming Geo secondary
# bound HTTP request should be redirected to the Primary.
#
# Why? A secondary is not allowed to perform any write actions, so any
# request of this type need to be sent through to the primary. By
# request of this type needs to be sent through to the Primary. By
# redirecting within code, we allow clients to git pull/push using their
# secondary git remote without needing an additional primary remote.
#
# The method for redirection *must* happen as early as possible in the
# request. For example, putting the redirection logic in #access_check
# will not work because the git client will not accept a 302 in response
# to verifying credentials.
#
# Current secondary HTTP requests to redirect: -
#
# * git pull (repository is not replicated)
# * GET /namespace/repo.git/info/refs?service=git-upload-pack
#
# * git lfs pull (repository is not replicated)
# * GET /namespace/repo.git/gitlab-lfs/objects/<oid>
#
# * git push
# * GET /repo.git/info/refs?service=git-receive-pack
# * POST /repo.git/git-receive-pack
# * GET /namespace/repo.git/info/refs?service=git-receive-pack
# * POST /namespace/repo.git/git-receive-pack
#
# * git lfs push (usually happens automatically as part of a `git push`)
# * POST /repo.git/info/lfs/objects/batch (and we examine
# * POST /namespace/repo.git/info/lfs/objects/batch (and we examine
# params[:operation] to ensure we're dealing with an upload request)
#
# For more detail, see the following links:
......@@ -30,13 +41,13 @@ module EE
#
prepended do
prepend_before_action do
redirect_to(primary_full_url) if redirect?
redirect_to(geo_primary_full_url) if geo_redirect?
end
end
private
class RouteHelper
class GeoRouteHelper
attr_reader :controller_name, :action_name
CONTROLLER_AND_ACTIONS_TO_REDIRECT = {
......@@ -44,7 +55,8 @@ module EE
'lfs_locks_api' => %w{create unlock verify}
}.freeze
def initialize(controller_name, action_name, service)
def initialize(project, controller_name, action_name, service)
@project = project
@controller_name = controller_name
@action_name = action_name
@service = service
......@@ -56,12 +68,20 @@ module EE
def redirect?
!!CONTROLLER_AND_ACTIONS_TO_REDIRECT[controller_name]&.include?(action_name) ||
git_receive_pack_request?
git_receive_pack_request? ||
redirect_to_avoid_enumeration? ||
not_yet_replicated_redirect?
end
def not_yet_replicated_redirect?
return false unless project
git_upload_pack_request? && !::Geo::ProjectRegistry.repository_replicated_for?(project.id)
end
private
attr_reader :service
attr_reader :project, :service
# Examples:
#
......@@ -83,6 +103,14 @@ module EE
service_or_action_name == 'git-receive-pack'
end
# Matches:
#
# GET /repo.git/info/refs?service=git-upload-pack
#
def git_upload_pack_request?
service_or_action_name == 'git-upload-pack'
end
# Matches:
#
# GET /repo.git/info/refs
......@@ -90,13 +118,24 @@ module EE
def info_refs_request?
action_name == 'info_refs'
end
# The purpose of the #redirect_to_avoid_enumeration? method is to avoid
# a scenario where an authenticated user uses the HTTP responses as a
# way of enumerating private projects. Without this check, an attacker
# could determine if a project exists or not by looking at the initial
# HTTP response code for 401 (doesn't exist) vs 302. (exists).
#
def redirect_to_avoid_enumeration?
project.nil?
end
end
class GitLFSHelper
class GeoGitLFSHelper
MINIMUM_GIT_LFS_VERSION = '2.4.2'.freeze
def initialize(route_helper, operation, current_version)
@route_helper = route_helper
def initialize(project, geo_route_helper, operation, current_version)
@project = project
@geo_route_helper = geo_route_helper
@operation = operation
@current_version = current_version
end
......@@ -110,8 +149,8 @@ module EE
end
def redirect?
return false unless route_helper.match?('lfs_api', 'batch')
return true if upload?
return true if batch_upload?
return true if not_yet_replicated_redirect?
false
end
......@@ -124,15 +163,33 @@ module EE
private
attr_reader :route_helper, :operation, :current_version
attr_reader :project, :geo_route_helper, :operation, :current_version
def incorrect_version_message
translation = _("You need git-lfs version %{min_git_lfs_version} (or greater) to continue. Please visit https://git-lfs.github.com")
translation % { min_git_lfs_version: MINIMUM_GIT_LFS_VERSION }
end
def upload?
operation == 'upload'
def batch_request?
geo_route_helper.match?('lfs_api', 'batch')
end
def batch_upload?
batch_request? && operation == 'upload'
end
def batch_download?
batch_request? && operation == 'download'
end
def transfer_download?
geo_route_helper.match?('lfs_storage', 'download')
end
def not_yet_replicated_redirect?
return false unless project
(batch_download? || transfer_download?) && !::Geo::ProjectRegistry.repository_replicated_for?(project.id)
end
def wanted_version
......@@ -140,52 +197,51 @@ module EE
end
end
def route_helper
@route_helper ||= RouteHelper.new(controller_name, action_name, params[:service])
def geo_route_helper
@geo_route_helper ||= GeoRouteHelper.new(project, controller_name, action_name, params[:service])
end
def git_lfs_helper
def geo_git_lfs_helper
# params[:operation] explained: https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md#requests
@git_lfs_helper ||= GitLFSHelper.new(route_helper, params[:operation], request.headers['User-Agent'])
@geo_git_lfs_helper ||= GeoGitLFSHelper.new(project, geo_route_helper, params[:operation], request.headers['User-Agent'])
end
def request_fullpath_for_primary
def geo_request_fullpath_for_primary
relative_url_root = ::Gitlab.config.gitlab.relative_url_root.chomp('/')
request.fullpath.sub(relative_url_root, '')
end
def primary_full_url
path = File.join(secondary_referrer_path_prefix, request_fullpath_for_primary)
def geo_primary_full_url
path = if geo_route_helper.not_yet_replicated_redirect?
# git clone/pull
geo_request_fullpath_for_primary
else
# git push
File.join(geo_secondary_referrer_path_prefix, geo_request_fullpath_for_primary)
end
::Gitlab::Utils.append_path(::Gitlab::Geo.primary_node.internal_url, path)
end
def secondary_referrer_path_prefix
def geo_secondary_referrer_path_prefix
File.join(::Gitlab::Geo::GitPushHttp::PATH_PREFIX, ::Gitlab::Geo.current_node.id.to_s)
end
def redirect?
# Don't redirect if we're not a secondary with a primary
def geo_redirect?
return false unless ::Gitlab::Geo.secondary_with_primary?
return true if geo_route_helper.redirect?
# Redirect as the request matches RouteHelper::CONTROLLER_AND_ACTIONS_TO_REDIRECT
return true if route_helper.redirect?
# Look to redirect, as we're an LFS batch upload request
if git_lfs_helper.redirect?
# Redirect as git-lfs version is at least 2.4.2
return true if git_lfs_helper.version_ok?
if geo_git_lfs_helper.redirect?
return true if geo_git_lfs_helper.version_ok?
# git-lfs 2.4.2 is really only required for requests that involve
# redirection, so we only render if it's an LFS upload operation
#
render(git_lfs_helper.incorrect_version_response)
render(geo_git_lfs_helper.incorrect_version_response)
# Don't redirect
return false
end
# Don't redirect
false
end
end
......
......@@ -209,6 +209,12 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(id: start..finish)
end
def self.repository_replicated_for?(project_id)
return true unless ::Gitlab::Geo.secondary_with_primary?
where(project_id: project_id).where.not(last_repository_successful_sync_at: nil).exists?
end
# Must be run before fetching the repository to avoid a race condition
#
# @param [String] type must be one of the values in TYPES
......@@ -432,6 +438,10 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
:synced
end
def repository_has_successfully_synced?
last_repository_successful_sync_at.present?
end
private
# Whether any operation has ever been attempted
......
---
title: Geo - Support git clone/pull operations for repositories that are not yet replicated
merge_request: 27072
author:
type: added
......@@ -8,6 +8,7 @@ module EE
prepended do
helpers do
include ::Gitlab::Utils::StrongMemoize
extend ::Gitlab::Utils::Override
override :lfs_authentication_url
......@@ -17,23 +18,42 @@ module EE
override :ee_post_receive_response_hook
def ee_post_receive_response_hook(response)
response.add_basic_message(geo_secondary_lag_message) if ::Gitlab::Geo.primary?
response.add_basic_message(geo_redirect_to_primary_message) if display_geo_redirect_to_primary_message?
response.add_basic_message(geo_secondary_lag_message) if geo_display_secondary_lag_message?
end
def geo_secondary_lag_message
lag = current_replication_lag
return if lag.to_i <= 0
def geo_display_secondary_lag_message?
::Gitlab::Geo.primary? && geo_current_replication_lag.to_i > 0
end
"Current replication lag: #{lag} seconds"
def geo_secondary_lag_message
"Current replication lag: #{geo_current_replication_lag} seconds"
end
def current_replication_lag
fetch_geo_node_referrer&.status&.db_replication_lag_seconds
def geo_current_replication_lag
strong_memoize(:geo_current_replication_lag) do
geo_referred_node&.status&.db_replication_lag_seconds
end
end
def fetch_geo_node_referrer
def geo_referred_node
strong_memoize(:geo_referred_node) do
::Gitlab::Geo::GitPushHttp.new(params[:identifier], params[:gl_repository]).fetch_referrer_node
end
end
def display_geo_redirect_to_primary_message?
::Gitlab::Geo.primary? && geo_redirect_to_primary_message
end
def geo_redirect_to_primary_message
return unless geo_referred_node
@geo_redirect_to_primary_message ||= begin
url = "#{::Gitlab::Geo.current_node.url.chomp('/')}/#{project.full_path}.git"
::Gitlab::Geo.redirecting_push_to_primary_message(url)
end
end
override :check_allowed
def check_allowed(params)
......
......@@ -40,7 +40,7 @@ module EE
end
def messages
messages = proxying_to_primary_message
messages = ::Gitlab::Geo.proxying_push_to_primary_message(primary_ssh_url_to_repo).split("\n")
lag_message = current_replication_lag_message
return messages unless lag_message
......@@ -75,23 +75,6 @@ module EE
geo_primary_ssh_url_to_repo(project_or_wiki)
end
def proxying_to_primary_message
# This is formatted like this to fit into the console 'box', e.g.
#
# remote:
# remote: You're pushing to a Geo secondary! We'll help you by proxying this
# remote: request to the primary:
# remote:
# remote: ssh://<user>@<host>:<port>/<group>/<repo>.git
# remote:
<<~STR.split("\n")
You're pushing to a Geo secondary! We'll help you by proxying this
request to the primary:
#{primary_ssh_url_to_repo}
STR
end
def current_replication_lag_message
return if ::Gitlab::Database.read_write? || current_replication_lag.zero?
......
......@@ -136,5 +136,34 @@ module Gitlab
Gitlab::CIDR.new(allowed_ips).match?(ip)
end
def self.proxying_push_to_primary_message(url)
push_to_primary_message(url, 'proxying')
end
def self.redirecting_push_to_primary_message(url)
push_to_primary_message(url, 'redirecting')
end
def self.push_to_primary_message(url, action)
return unless url && action
# This is formatted like this to fit into the console 'box', e.g.
#
# remote:
# remote: You're pushing to a Geo secondary! We'll help you by <action> this
# remote: request to the primary:
# remote:
# remote: <url>
# remote:
template = <<~STR
You're pushing to a Geo secondary! We'll help you by %{action} this
request to the primary:
%{url}
STR
_(template) % { action: _(action), url: url }
end
end
end
......@@ -307,4 +307,32 @@ describe Gitlab::Geo, :geo, :request_store do
end
end
end
describe '.proxying_to_primary_message' do
it 'returns a message as a string' do
url = 'ssh://git@primary.com/namespace/repo.git'
message = <<~STR
You're pushing to a Geo secondary! We'll help you by proxying this
request to the primary:
#{url}
STR
expect(described_class.proxying_push_to_primary_message(url)).to eq(message)
end
end
describe '.redirecting_to_primary_message' do
it 'returns a message as a string' do
url = 'http://primary.com/namespace/repo.git'
message = <<~STR
You're pushing to a Geo secondary! We'll help you by redirecting this
request to the primary:
#{url}
STR
expect(described_class.redirecting_push_to_primary_message(url)).to eq(message)
end
end
end
......@@ -3,6 +3,7 @@
require 'spec_helper'
describe Geo::ProjectRegistry do
include ::EE::GeoHelpers
using RSpec::Parameterized::TableSyntax
set(:project) { create(:project, description: 'kitten mittens') }
......@@ -210,6 +211,72 @@ describe Geo::ProjectRegistry do
end
end
describe '.repository_replicated_for?' do
context 'for a non-Geo setup' do
it 'returns true' do
expect(described_class.repository_replicated_for?(project.id)).to be_truthy
end
end
context 'for a Geo setup' do
before do
stub_current_geo_node(current_node)
end
context 'for a Geo Primary' do
let(:current_node) { create(:geo_node, :primary) }
it 'returns true' do
expect(described_class.repository_replicated_for?(project.id)).to be_truthy
end
end
context 'for a Geo secondary' do
let(:current_node) { create(:geo_node) }
context 'where Primary node is not configured' do
it 'returns true' do
expect(described_class.repository_replicated_for?(project.id)).to be_truthy
end
end
context 'where Primary node is configured' do
before do
create(:geo_node, :primary)
end
context 'where project_registry entry does not exist' do
it 'returns false' do
project_without_registry = create(:project)
expect(described_class.repository_replicated_for?(project_without_registry.id)).to be_falsey
end
end
context 'where project_registry entry does exist' do
context 'where last_repository_successful_sync_at is not set' do
it 'returns false' do
project_with_failed_registry = create(:project)
create(:geo_project_registry, :repository_sync_failed, project: project_with_failed_registry)
expect(described_class.repository_replicated_for?(project_with_failed_registry.id)).to be_falsey
end
end
context 'where last_repository_successful_sync_at is set' do
it 'returns true' do
project_with_synced_registry = create(:project)
create(:geo_project_registry, :synced, project: project_with_synced_registry)
expect(described_class.repository_replicated_for?(project_with_synced_registry.id)).to be_truthy
end
end
end
end
end
end
end
describe '#repository_sync_due?' do
where(:last_synced_at, :resync, :retry_at, :expected) do
now = Time.now
......@@ -1014,4 +1081,22 @@ describe Geo::ProjectRegistry do
expect(registry.synchronization_state).to eq(:synced)
end
end
describe 'repository_has_successfully_synced?' do
context 'when repository has never successfully synced' do
it 'returns false' do
registry = create(:geo_project_registry, last_repository_successful_sync_at: nil)
expect(registry.repository_has_successfully_synced?).to be_falsey
end
end
context 'when repository has successfully synced' do
it 'returns true' do
registry = create(:geo_project_registry, last_repository_successful_sync_at: Time.now)
expect(registry.repository_has_successfully_synced?).to be_truthy
end
end
end
end
......@@ -4,8 +4,10 @@ require 'spec_helper'
describe API::Internal::Base do
include EE::GeoHelpers
let_it_be(:primary_node, reload: true) { create(:geo_node, :primary) }
let_it_be(:secondary_node, reload: true) { create(:geo_node) }
let_it_be(:primary_url) { 'http://primary.example.com' }
let_it_be(:secondary_url) { 'http://secondary.example.com' }
let_it_be(:primary_node, reload: true) { create(:geo_node, :primary, url: primary_url) }
let_it_be(:secondary_node, reload: true) { create(:geo_node, url: secondary_url) }
describe 'POST /internal/post_receive', :geo do
let_it_be(:user) { create(:user) }
......@@ -96,6 +98,23 @@ describe API::Internal::Base do
expect(json_response['messages']).not_to include({ 'message' => a_string_matching('replication lag'), 'type' => anything })
end
end
it 'includes a message advising a redirection occurred' do
redirect_message = <<~STR
You're pushing to a Geo secondary! We'll help you by redirecting this
request to the primary:
http://primary.example.com/#{project.full_path}.git
STR
post api('/internal/post_receive'), params: valid_params
expect(response).to have_gitlab_http_status(:ok)
expect(json_response['messages']).to include({
'type' => 'basic',
'message' => redirect_message
})
end
end
context 'when the push was not redirected from a Geo secondary to the primary' do
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment