Commit 95870df2 authored by Stan Hu's avatar Stan Hu

Consolidate object storage config in one place

Currently each object type has its own section in gitlab.yml. For
example:

```
  artifacts:
    path: tmp/tests/artifacts
    enabled: true
    object_store:
      enabled: false
      remote_directory: artifacts # The bucket name
      background_upload: false
      connection:
        provider: AWS
        aws_access_key_id: AWS_ACCESS_KEY_ID
        aws_secret_access_key: AWS_SECRET_ACCESS_KEY
        region: us-east-1
```

External diffs, LFS, uploads, packages, etc. all have similar
independent configuration object storage sections. While this redundancy
makes it possible to configure each bucket with different providers or
credentials, this causes a configuration explosion that makes GitLab
hard to manage.

This change preserves the legacy format but adds a new `gitlab.yml`
section that enforces a single, common object storage provider for all
object storage types. This will make it possible for the S3 client in
Workhorse to operate with one credential and simplify the configuration
for the end user. An example config:

```
  object_store:
    enabled: true
    connection:
      provider: AWS
      aws_access_key_id: AWS_ACCESS_KEY_ID
      aws_secret_access_key: AWS_SECRET_ACCESS_KEY
      region: us-east-1
    proxy_download: true
    objects:
      artifacts:
        bucket: artifacts
        proxy_download: false
      external_diffs:
        bucket: external-diffs
      lfs:
        bucket: lfs-objects
      uploads:
        bucket: uploads
      packages:
        bucket: packages
      dependency_proxy:
        bucket: dependency_proxy
```

Note that:

1. The consolidated config only gets used if `object_store` is NOT
   defined within the types themselves.
2. A bucket needs to be defined for each object type.
3. Only bucket, enabled, and proxy_download can be overridden from the
common configuration.

Consolidating support for a single bucket for all types is a larger and
more involved change.

First step of https://gitlab.com/gitlab-org/gitlab/-/issues/23345
parent da918859
---
title: Consolidate object storage config in one place
merge_request: 34460
author:
type: changed
......@@ -205,6 +205,34 @@ production: &base
# Whether to expunge (permanently remove) messages from the mailbox when they are deleted after delivery
expunge_deleted: false
## Consolidated object store config
## This will only take effect if the object_store sections are not defined
## within the types (e.g. artifacts, lfs, etc.).
# object_store:
# enabled: false
# remote_directory: artifacts # The bucket name
# proxy_download: false # Passthrough all downloads via GitLab instead of using Redirects to Object Storage
# connection:
# provider: AWS # Only AWS supported at the moment
# aws_access_key_id: AWS_ACCESS_KEY_ID
# aws_secret_access_key: AWS_SECRET_ACCESS_KEY
# region: us-east-1
# aws_signature_version: 4 # For creation of signed URLs. Set to 2 if provider does not support v4.
# endpoint: 'https://s3.amazonaws.com' # default: nil - Useful for S3 compliant services such as DigitalOcean Spaces
# objects:
# artifacts:
# bucket: artifacts
# external_diffs:
# bucket: external-diffs
# lfs:
# bucket: lfs-objects
# uploads:
# bucket: uploads
# packages:
# bucket: packages
# dependency_proxy:
# bucket: dependency_proxy
## Build Artifacts
artifacts:
enabled: true
......
......@@ -254,7 +254,7 @@ Settings.artifacts['storage_path'] = Settings.absolute(Settings.artifacts.values
# Settings.artifact['path'] is deprecated, use `storage_path` instead
Settings.artifacts['path'] = Settings.artifacts['storage_path']
Settings.artifacts['max_size'] ||= 100 # in megabytes
Settings.artifacts['object_store'] = ObjectStoreSettings.parse(Settings.artifacts['object_store'])
Settings.artifacts['object_store'] = ObjectStoreSettings.legacy_parse(Settings.artifacts['object_store'])
#
# Registry
......@@ -325,7 +325,7 @@ Settings['external_diffs'] ||= Settingslogic.new({})
Settings.external_diffs['enabled'] = false if Settings.external_diffs['enabled'].nil?
Settings.external_diffs['when'] = 'always' if Settings.external_diffs['when'].nil?
Settings.external_diffs['storage_path'] = Settings.absolute(Settings.external_diffs['storage_path'] || File.join(Settings.shared['path'], 'external-diffs'))
Settings.external_diffs['object_store'] = ObjectStoreSettings.parse(Settings.external_diffs['object_store'])
Settings.external_diffs['object_store'] = ObjectStoreSettings.legacy_parse(Settings.external_diffs['object_store'])
#
# Git LFS
......@@ -333,7 +333,7 @@ Settings.external_diffs['object_store'] = ObjectStoreSettings.parse(Settings.ext
Settings['lfs'] ||= Settingslogic.new({})
Settings.lfs['enabled'] = true if Settings.lfs['enabled'].nil?
Settings.lfs['storage_path'] = Settings.absolute(Settings.lfs['storage_path'] || File.join(Settings.shared['path'], "lfs-objects"))
Settings.lfs['object_store'] = ObjectStoreSettings.parse(Settings.lfs['object_store'])
Settings.lfs['object_store'] = ObjectStoreSettings.legacy_parse(Settings.lfs['object_store'])
#
# Uploads
......@@ -341,7 +341,7 @@ Settings.lfs['object_store'] = ObjectStoreSettings.parse(Settings.lfs['object_st
Settings['uploads'] ||= Settingslogic.new({})
Settings.uploads['storage_path'] = Settings.absolute(Settings.uploads['storage_path'] || 'public')
Settings.uploads['base_dir'] = Settings.uploads['base_dir'] || 'uploads/-/system'
Settings.uploads['object_store'] = ObjectStoreSettings.parse(Settings.uploads['object_store'])
Settings.uploads['object_store'] = ObjectStoreSettings.legacy_parse(Settings.uploads['object_store'])
Settings.uploads['object_store']['remote_directory'] ||= 'uploads'
#
......@@ -351,7 +351,7 @@ Gitlab.ee do
Settings['packages'] ||= Settingslogic.new({})
Settings.packages['enabled'] = true if Settings.packages['enabled'].nil?
Settings.packages['storage_path'] = Settings.absolute(Settings.packages['storage_path'] || File.join(Settings.shared['path'], "packages"))
Settings.packages['object_store'] = ObjectStoreSettings.parse(Settings.packages['object_store'])
Settings.packages['object_store'] = ObjectStoreSettings.legacy_parse(Settings.packages['object_store'])
end
#
......@@ -361,7 +361,7 @@ Gitlab.ee do
Settings['dependency_proxy'] ||= Settingslogic.new({})
Settings.dependency_proxy['enabled'] = true if Settings.dependency_proxy['enabled'].nil?
Settings.dependency_proxy['storage_path'] = Settings.absolute(Settings.dependency_proxy['storage_path'] || File.join(Settings.shared['path'], "dependency_proxy"))
Settings.dependency_proxy['object_store'] = ObjectStoreSettings.parse(Settings.dependency_proxy['object_store'])
Settings.dependency_proxy['object_store'] = ObjectStoreSettings.legacy_parse(Settings.dependency_proxy['object_store'])
# For first iteration dependency proxy uses Rails server to download blobs.
# To ensure acceptable performance we only allow feature to be used with
......@@ -376,7 +376,7 @@ end
Settings['terraform_state'] ||= Settingslogic.new({})
Settings.terraform_state['enabled'] = true if Settings.terraform_state['enabled'].nil?
Settings.terraform_state['storage_path'] = Settings.absolute(Settings.terraform_state['storage_path'] || File.join(Settings.shared['path'], "terraform_state"))
Settings.terraform_state['object_store'] = ObjectStoreSettings.parse(Settings.terraform_state['object_store'])
Settings.terraform_state['object_store'] = ObjectStoreSettings.legacy_parse(Settings.terraform_state['object_store'])
#
# Mattermost
......@@ -595,6 +595,9 @@ Settings.gitlab_shell['owner_group'] ||= Settings.gitlab.user
Settings.gitlab_shell['ssh_path_prefix'] ||= Settings.__send__(:build_gitlab_shell_ssh_path_prefix)
Settings.gitlab_shell['git_timeout'] ||= 10800
# Object storage
ObjectStoreSettings.new(Settings).parse!
#
# Workhorse
#
......
# Set default values for object_store settings
class ObjectStoreSettings
def self.parse(object_store)
SUPPORTED_TYPES = %w(artifacts external_diffs lfs uploads packages dependency_proxy terraform_state).freeze
ALLOWED_OBJECT_STORE_OVERRIDES = %w(bucket enabled proxy_download).freeze
attr_accessor :settings
# Legacy parser
def self.legacy_parse(object_store)
object_store ||= Settingslogic.new({})
object_store['enabled'] = false if object_store['enabled'].nil?
object_store['remote_directory'] ||= nil
......@@ -12,4 +18,126 @@ class ObjectStoreSettings
object_store['connection']&.deep_stringify_keys!
object_store
end
def initialize(settings)
@settings = settings
end
# This method converts the common object storage settings to
# the legacy, internal representation.
#
# For example, with the folowing YAML:
#
# object_store:
# enabled: true
# connection:
# provider: AWS
# aws_access_key_id: minio
# aws_secret_access_key: gdk-minio
# region: gdk
# endpoint: 'http://127.0.0.1:9000'
# path_style: true
# proxy_download: true
# objects:
# artifacts:
# bucket: artifacts
# proxy_download: false
# lfs:
# bucket: lfs-objects
#
# This method then will essentially call:
#
# Settings.artifacts['object_store'] = {
# "enabled" => true,
# "connection"=> {
# "provider" => "AWS",
# "aws_access_key_id" => "minio",
# "aws_secret_access_key" => "gdk-minio",
# "region" => "gdk",
# "endpoint" => "http://127.0.0.1:9000",
# "path_style" => true
# },
# "direct_upload" => true,
# "background_upload" => false,
# "proxy_download" => false,
# "remote_directory" => "artifacts"
# }
#
# Settings.lfs['object_store'] = {
# "enabled" => true,
# "connection" => {
# "provider" => "AWS",
# "aws_access_key_id" => "minio",
# "aws_secret_access_key" => "gdk-minio",
# "region" => "gdk",
# "endpoint" => "http://127.0.0.1:9000",
# "path_style" => true
# },
# "direct_upload" => true,
# "background_upload" => false,
# "proxy_download" => true,
# "remote_directory" => "lfs-objects"
# }
#
# Note that with the common config:
# 1. Only one object store credentials can now be used. This is
# necessary to limit configuration overhead when an object storage
# client (e.g. AWS S3) is used inside GitLab Workhorse.
# 2. However, a bucket has to be specified for each object
# type. Reusing buckets is not really supported, but we don't
# enforce that yet.
# 3. direct_upload and background_upload cannot be configured anymore.
def parse!
return unless use_consolidated_settings?
main_config = settings['object_store']
common_config = main_config.slice('enabled', 'connection', 'proxy_download')
# Convert connection settings to use string keys, to make Fog happy
common_config['connection']&.deep_stringify_keys!
# These are no longer configurable if common config is used
common_config['direct_upload'] = true
common_config['background_upload'] = false
SUPPORTED_TYPES.each do |store_type|
overrides = main_config.dig('objects', store_type) || {}
target_config = common_config.merge(overrides.slice(*ALLOWED_OBJECT_STORE_OVERRIDES))
section = settings.try(store_type)
next unless section
raise "Object storage for #{store_type} must have a bucket specified" if section['enabled'] && target_config['bucket'].blank?
# Map bucket (external name) -> remote_directory (internal representation)
target_config['remote_directory'] = target_config.delete('bucket')
section['object_store'] = target_config
end
end
private
# We only can use the common object storage settings if:
# 1. The common settings are defined
# 2. The legacy settings are not defined
def use_consolidated_settings?
return false unless settings.dig('object_store', 'enabled')
return false unless settings.dig('object_store', 'connection')
SUPPORTED_TYPES.each do |store|
# to_h is needed because something strange happens to
# Settingslogic#dig when stub_storage_settings is run in tests:
#
# (byebug) section.dig
# *** ArgumentError Exception: wrong number of arguments (given 0, expected 1+)
# (byebug) section.dig('object_store')
# *** ArgumentError Exception: wrong number of arguments (given 1, expected 0)
section = settings.try(store)&.to_h
next unless section
return false if section.dig('object_store', 'enabled')
return false if section.dig('object_store', 'connection')
end
true
end
end
......@@ -4,9 +4,105 @@ require 'spec_helper'
require Rails.root.join('config', 'object_store_settings.rb')
RSpec.describe ObjectStoreSettings do
describe '.parse' do
describe '#parse!' do
let(:settings) { Settingslogic.new(config) }
subject { described_class.new(settings).parse! }
context 'with valid config' do
let(:connection) do
{
'provider' => 'AWS',
'aws_access_key_id' => 'AWS_ACCESS_KEY_ID',
'aws_secret_access_key' => 'AWS_SECRET_ACCESS_KEY',
'region' => 'us-east-1'
}
end
let(:config) do
{
'lfs' => { 'enabled' => true },
'artifacts' => { 'enabled' => true },
'external_diffs' => { 'enabled' => false },
'object_store' => {
'enabled' => true,
'connection' => connection,
'proxy_download' => true,
'objects' => {
'artifacts' => {
'bucket' => 'artifacts',
'proxy_download' => false
},
'lfs' => {
'bucket' => 'lfs-objects'
},
'external_diffs' => {
'bucket' => 'external_diffs',
'enabled' => false
}
}
}
}
end
it 'sets correct default values' do
subject
expect(settings.artifacts['enabled']).to be true
expect(settings.artifacts['object_store']['enabled']).to be true
expect(settings.artifacts['object_store']['connection']).to eq(connection)
expect(settings.artifacts['object_store']['direct_upload']).to be true
expect(settings.artifacts['object_store']['background_upload']).to be false
expect(settings.artifacts['object_store']['proxy_download']).to be false
expect(settings.artifacts['object_store']['remote_directory']).to eq('artifacts')
expect(settings.lfs['enabled']).to be true
expect(settings.lfs['object_store']['enabled']).to be true
expect(settings.lfs['object_store']['connection']).to eq(connection)
expect(settings.lfs['object_store']['direct_upload']).to be true
expect(settings.lfs['object_store']['background_upload']).to be false
expect(settings.lfs['object_store']['proxy_download']).to be true
expect(settings.lfs['object_store']['remote_directory']).to eq('lfs-objects')
expect(settings.external_diffs['enabled']).to be false
expect(settings.external_diffs['object_store']['enabled']).to be false
expect(settings.external_diffs['object_store']['remote_directory']).to eq('external_diffs')
end
it 'raises an error when a bucket is missing' do
config['object_store']['objects']['lfs'].delete('bucket')
expect { subject }.to raise_error(/Object storage for lfs must have a bucket specified/)
end
context 'with legacy config' do
let(:legacy_settings) do
{
'enabled' => true,
'remote_directory' => 'some-bucket',
'direct_upload' => true,
'background_upload' => false,
'proxy_download' => false
}
end
before do
settings.lfs['object_store'] = described_class.legacy_parse(legacy_settings)
end
it 'does not alter config if legacy settings are specified' do
subject
expect(settings.artifacts['object_store']).to be_nil
expect(settings.lfs['object_store']['remote_directory']).to eq('some-bucket')
expect(settings.external_diffs['object_store']).to be_nil
end
end
end
end
describe '.legacy_parse' do
it 'sets correct default values' do
settings = described_class.parse(nil)
settings = described_class.legacy_parse(nil)
expect(settings['enabled']).to be false
expect(settings['direct_upload']).to be false
......@@ -20,7 +116,7 @@ RSpec.describe ObjectStoreSettings do
'remote_directory' => 'artifacts'
})
settings = described_class.parse(original_settings)
settings = described_class.legacy_parse(original_settings)
expect(settings['enabled']).to be true
expect(settings['direct_upload']).to be false
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment