Commit c601ca55 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'geo-transfer-support' into 'master'

Basic support for GitLab Geo transfers

See merge request !1237
parents b77c1ae7 1aff5fe4
......@@ -15,12 +15,22 @@ class GeoNode < ActiveRecord::Base
validates :primary, uniqueness: { message: 'node already exists' }, if: :primary
validates :schema, inclusion: %w(http https)
validates :relative_url_root, length: { minimum: 0, allow_nil: false }
validates :access_key, presence: true
validates :encrypted_secret_access_key, presence: true
after_initialize :build_dependents
after_save :refresh_bulk_notify_worker_status
after_destroy :refresh_bulk_notify_worker_status
before_validation :update_dependents_attributes
before_validation :ensure_access_keys!
attr_encrypted :secret_access_key,
key: Gitlab::Application.secrets.db_key_base,
algorithm: 'aes-256-gcm',
mode: :per_attribute_iv,
encode: true
def uri
if relative_url_root
relative_url = relative_url_root.starts_with?('/') ? relative_url_root : "/#{relative_url_root}"
......@@ -42,15 +52,19 @@ class GeoNode < ActiveRecord::Base
end
def notify_projects_url
URI.join(uri, "#{uri.path}/", "api/#{API::API.version}/geo/refresh_projects").to_s
geo_api_url('refresh_projects')
end
def notify_wikis_url
URI.join(uri, "#{uri.path}/", "api/#{API::API.version}/geo/refresh_wikis").to_s
geo_api_url('refresh_wikis')
end
def geo_events_url
URI.join(uri, "#{uri.path}/", "api/#{API::API.version}/geo/receive_events").to_s
geo_api_url('receive_events')
end
def geo_transfers_url(file_type, file_id)
geo_api_url("transfers/#{file_type}/#{file_id}")
end
def oauth_callback_url
......@@ -73,6 +87,19 @@ class GeoNode < ActiveRecord::Base
private
def geo_api_url(suffix)
URI.join(uri, "#{uri.path}/", "api/#{API::API.version}/geo/#{suffix}").to_s
end
def ensure_access_keys!
return if self.access_key.present? && self.encrypted_secret_access_key.present?
keys = Gitlab::Geo.generate_access_keys
self.access_key = keys[:access_key]
self.secret_access_key = keys[:secret_access_key]
end
def url_helper_args
if relative_url_root
relative_url = relative_url_root.starts_with?('/') ? relative_url_root : "/#{relative_url_root}"
......
module Geo
class FileUploadService
IAT_LEEWAY = 60.seconds.to_i
attr_reader :params, :auth_header
def initialize(params, auth_header)
@params = params
@auth_header = auth_header
end
def execute
# Returns { code: :ok, file: CarrierWave File object } upon success
data = ::Gitlab::Geo::JwtRequestDecoder.new(auth_header).decode
return unless data.present?
response =
case params[:type]
when 'lfs'
handle_lfs_geo_request(params[:id], data)
else
{}
end
response
end
def handle_lfs_geo_request(id, message)
status = { code: :not_found, message: 'LFS object not found' }
lfs_object = LfsObject.find(id)
return status unless lfs_object.present?
if message[:sha256] != lfs_object.oid
return status
end
unless lfs_object.file.present? && lfs_object.file.exists?
status[:message] = "LFS object does not have a file"
return status
end
status[:code] = :ok
status[:message] = "Success"
status[:file] = lfs_object.file
status
end
end
end
class AddAccessKeysToGeoNodes < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
class EncryptedData
extend AttrEncrypted
attr_accessor :data
attr_encrypted :data,
key: Gitlab::Application.secrets.db_key_base,
algorithm: 'aes-256-gcm',
mode: :per_attribute_iv,
encode: true
end
def up
add_column :geo_nodes, :access_key, :string
add_column :geo_nodes, :encrypted_secret_access_key, :string
add_column :geo_nodes, :encrypted_secret_access_key_iv, :string
add_concurrent_index :geo_nodes, :access_key
populate_secret_keys
end
def down
remove_column :geo_nodes, :access_key
remove_column :geo_nodes, :encrypted_secret_access_key
remove_column :geo_nodes, :encrypted_secret_access_key_iv
remove_index :geo_nodes, :access_key
end
private
def populate_secret_keys
select_all("SELECT id FROM geo_nodes").each do |node|
id = node['id']
keys = Gitlab::Geo.generate_access_keys
encrypted = EncryptedData.new
encrypted.data = keys[:secret_access_key]
query = %(
UPDATE geo_nodes
SET access_key = #{quote(keys[:access_key])},
encrypted_secret_access_key = #{quote(encrypted.encrypted_data)},
encrypted_secret_access_key_iv = #{quote(encrypted.encrypted_data_iv)}
WHERE id = #{id}
).squish
execute(query)
end
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20170217151947) do
ActiveRecord::Schema.define(version: 20170224075132) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -521,8 +521,12 @@ ActiveRecord::Schema.define(version: 20170217151947) do
t.integer "geo_node_key_id"
t.integer "oauth_application_id"
t.integer "system_hook_id"
t.string "access_key"
t.string "encrypted_secret_access_key"
t.string "encrypted_secret_access_key_iv"
end
add_index "geo_nodes", ["access_key"], name: "index_geo_nodes_on_access_key", using: :btree
add_index "geo_nodes", ["host"], name: "index_geo_nodes_on_host", using: :btree
add_index "geo_nodes", ["primary"], name: "index_geo_nodes_on_primary", using: :btree
......
......@@ -81,3 +81,42 @@ take any extra step for that.
We do use our feature toggle `.secondary?` to coordinate Git operations and do
the correct authorization (denying writing on any secondary node).
## File Transfers
Secondary Geo Nodes need to transfer files, such as LFS objects, attachments, avatars,
etc. from the primary. To do this, secondary nodes have a separate tracking database
that records which objects it needs to transfer.
Files are copied via HTTP(s) and initiated via the
`/api/v4/geo/transfers/:type/:id` endpoint.
### Authentication
To authenticate file transfers, each GeoNode has two fields:
1. A public access key (`access_key`)
2. A secret access key (`secret_access_key`)
The secondary authenticates itself via a [JWT request](https://jwt.io/). When the
secondary wishes to download a file, it sends an HTTP request with the `Authorization`
header:
```
Authorization: GL-Geo <access_key>:<JWT payload>
```
The primary uses the `access_key` to look up the corresponding Geo node and
decrypt the JWT payload, which contains additional information to identify the
file request. This ensures that the secondary downloads the right file for the
right database ID. For example, for an LFS object, the request must also
include the SHA256 of the file. An example JWT payload looks like:
```
{ "data": { sha256: "31806bb23580caab78040f8c45d329f5016b0115" }, iat: "1234567890" }
```
If the data checks out, then the Geo primary sends data via the
[XSendfile](https://www.nginx.com/resources/wiki/start/topics/examples/xsendfile/)
feature, which allows nginx to handle the file transfer without tying up Rails
or Workhorse.
module API
class Geo < Grape::API
resource :geo do
# Verify the GitLab Geo transfer request is valid
# All transfers use the Authorization header to pass a JWT
# payload.
#
# For LFS objects, validate the object ID exists in the DB
# and that the object ID matches the requested ID. This is
# a sanity check against some malicious client requesting
# a random file path.
params do
requires :type, type: String, desc: 'File transfer type (e.g. lfs)'
requires :id, type: Integer, desc: 'The DB ID of the file'
end
get 'transfers/:type/:id' do
service = ::Geo::FileUploadService.new(params, headers['Authorization'])
response = service.execute
unauthorized! unless response.present?
if response[:code] == :ok
file = response[:file]
present_file!(file.path, file.filename)
else
status response[:code]
response
end
end
# Enqueue a batch of IDs of wiki's projects to have their
# wiki repositories updated
#
......
......@@ -59,5 +59,18 @@ module Gitlab
RequestStore.fetch(key) { yield }
end
def self.generate_access_keys
# Inspired by S3
{
access_key: generate_random_string(20),
secret_access_key: generate_random_string(40)
}
end
def self.generate_random_string(size)
# urlsafe_base64 may return a string of size * 4/3
SecureRandom.urlsafe_base64(size)[0, size]
end
end
end
module Gitlab
module Geo
class JwtRequestDecoder
IAT_LEEWAY = 60.seconds.to_i
attr_reader :auth_header
def initialize(auth_header)
@auth_header = auth_header
end
def decode
decode_geo_request
end
private
def decode_geo_request
# A Geo transfer request has an Authorization header:
# Authorization: GL-Geo: <Geo Access Key>:<JWT payload>
#
# For example:
# JWT payload = { "data": { "oid": "12345" }, iat: 123456 }
#
data = decode_auth_header
return unless data.present?
secret, encoded_message = data
begin
decoded = JWT.decode(
encoded_message,
secret,
true,
{ iat_leeway: IAT_LEEWAY, verify_iat: true, algorithm: 'HS256' },
)
message = decoded.first
data = JSON.parse(message['data']) if message
data&.deep_symbolize_keys!
data
rescue JWT::DecodeError => e
Rails.logger.error("Error decoding Geo transfer request: #{e}")
end
end
def hmac_secret(access_key)
@hmac_secret ||= begin
geo_node = GeoNode.find_by(access_key: access_key)
geo_node&.secret_access_key
end
end
def decode_auth_header
return unless auth_header.present?
tokens = auth_header.split(' ')
return unless tokens.count == 2
return unless tokens[0] == Gitlab::Geo::TransferRequest::GITLAB_GEO_AUTH_TOKEN_TYPE
# Split at the first occurence of a colon
geo_tokens = tokens[1].split(':', 2)
return unless geo_tokens.count == 2
access_key = geo_tokens[0]
encoded_message = geo_tokens[1]
secret = hmac_secret(access_key)
return unless secret.present?
[secret, encoded_message]
end
end
end
end
module Gitlab
module Geo
class LfsTransfer < Transfer
def initialize(lfs_object)
@file_type = :lfs
@file_id = lfs_object.id
@filename = lfs_object.file.path
@request_data = lfs_request_data(lfs_object)
end
private
def lfs_request_data(lfs_object)
{ sha256: lfs_object.oid }
end
end
end
end
module Gitlab
module Geo
class Transfer
attr_reader :file_type, :file_id, :filename, :request_data
def initialize(file_type, file_id, filename, request_data)
@file_type = file_type
@file_id = file_id
@filename = filename
@request_data = request_data
end
# Returns number of bytes downloaded or -1 if unsuccessful.
def download_from_primary
return unless Gitlab::Geo.secondary?
return if File.directory?(filename)
primary = Gitlab::Geo.primary_node
return unless primary
url = primary.geo_transfers_url(file_type, file_id.to_s)
req_header = TransferRequest.new(request_data).header
return unless ensure_path_exists
download_file(url, req_header)
end
private
def ensure_path_exists
path = Pathname.new(filename)
dir = path.dirname
return true if File.directory?(dir)
if File.exist?(dir)
log_transfer_error("#{dir} is not a directory, unable to save #{filename}")
return false
end
begin
FileUtils.mkdir_p(dir)
rescue => e
log_transfer_error("unable to create directory #{dir}: #{e}")
return false
end
true
end
def log_transfer_error(message)
Rails.logger.error("#{self.class.name}: #{message}")
end
# Use HTTParty for now but switch to curb if performance becomes
# an issue
def download_file(url, req_header)
file_size = -1
begin
File.open(filename, "w") do |file|
response = HTTParty.get(url, headers: req_header, stream_body: true) do |fragment|
file.write(fragment)
end
if response.success?
file_size = File.stat(filename).size
Rails.logger.info("GitLab Geo: Successfully downloaded #{filename} (#{file_size} bytes)")
else
log_transfer_error("Unsuccessful download: #{response.code} #{response.msg}")
end
end
rescue StandardError, HTTParty::Error => e
log_transfer_error("Error downloading file: #{e}")
end
file_size
end
end
end
end
module Gitlab
module Geo
class TransferRequest
GITLAB_GEO_AUTH_TOKEN_TYPE = 'GL-Geo'.freeze
attr_reader :request_data
def initialize(request_data)
@request_data = request_data
end
def header
{
"Authorization" => geo_transfer_auth(request_data.to_json),
"X-Sendfile-Type" => "X-Sendfile"
}
end
private
def geo_transfer_auth(message)
geo_node = requesting_node
return unless geo_node
payload = { data: message, iat: Time.now.to_i }
token = JWT.encode(payload, geo_node.secret_access_key, 'HS256')
"#{GITLAB_GEO_AUTH_TOKEN_TYPE} #{geo_node.access_key}:#{token}"
end
def requesting_node
Gitlab::Geo.current_node
end
end
end
end
......@@ -8,5 +8,9 @@ FactoryGirl.define do
primary true
port { Gitlab.config.gitlab.port }
end
trait :current do
port { Gitlab.config.gitlab.port }
end
end
end
require 'spec_helper'
describe Gitlab::Geo::JwtRequestDecoder do
let!(:primary_node) { FactoryGirl.create(:geo_node, :primary) }
let(:data) { { input: 123 } }
let(:request) { Gitlab::Geo::TransferRequest.new(data) }
subject { described_class.new(request.header['Authorization']) }
describe '#decode' do
it 'decodes correct data' do
expect(subject.decode).to eq(data)
end
it 'fails to decode with wrong key' do
data = request.header['Authorization']
primary_node.secret_access_key = ''
primary_node.save
expect(described_class.new(data).decode).to be_nil
end
end
end
require 'spec_helper'
describe Gitlab::Geo::Transfer do
let!(:primary_node) { FactoryGirl.create(:geo_node, :primary) }
let!(:secondary_node) { FactoryGirl.create(:geo_node) }
let(:lfs_object) { create(:lfs_object, :with_file) }
let(:url) { primary_node.geo_transfers_url(:lfs, lfs_object.id.to_s) }
let(:content) { StringIO.new("1\n2\n3") }
let(:size) { File.stat(lfs_object.file.path).size }
before do
allow(File).to receive(:open).with(lfs_object.file.path, "w").and_yield(content)
end
subject do
described_class.new(:lfs,
lfs_object.id,
lfs_object.file.path,
{ sha256: lfs_object.oid })
end
it '#download_from_primary' do
allow(Gitlab::Geo).to receive(:current_node) { secondary_node }
response = double(success?: true)
expect(HTTParty).to receive(:get).and_return(response)
expect(subject.download_from_primary).to eq(size)
end
end
......@@ -102,4 +102,13 @@ describe Gitlab::Geo, lib: true do
expect(described_class.license_allows?).to be_falsey
end
end
describe '.generate_access_keys' do
it 'returns a public and secret access key' do
keys = described_class.generate_access_keys
expect(keys[:access_key].length).to eq(20)
expect(keys[:secret_access_key].length).to eq(40)
end
end
end
......@@ -182,6 +182,14 @@ describe GeoNode, type: :model do
end
end
describe '#geo_transfers_url' do
let(:transfers_url) { "https://localhost:3000/gitlab/api/#{api_version}/geo/transfers/lfs/1" }
it 'returns api url based on node uri' do
expect(new_node.geo_transfers_url(:lfs, 1)).to eq(transfers_url)
end
end
describe '#oauth_callback_url' do
let(:oauth_callback_url) { 'https://localhost:3000/gitlab/oauth/geo/callback' }
......
......@@ -4,15 +4,11 @@ describe API::Geo, api: true do
include ApiHelpers
let(:admin) { create(:admin) }
let(:user) { create(:user) }
let(:geo_node) { build(:geo_node) }
let!(:geo_node) { create(:geo_node, :primary) }
let(:geo_token_header) do
{ 'X-Gitlab-Token' => geo_node.system_hook.token }
end
before(:each) do
allow(Gitlab::Geo).to receive(:current_node) { geo_node }
end
describe 'POST /geo/receive_events authentication' do
it 'denies access if token is not present' do
post api('/geo/receive_events')
......@@ -107,4 +103,41 @@ describe API::Geo, api: true do
expect(response.status).to eq 201
end
end
describe 'GET /geo/transfers/lfs/1' do
let!(:secondary_node) { create(:geo_node) }
let(:lfs_object) { create(:lfs_object, :with_file) }
let(:req_header) do
transfer = Gitlab::Geo::LfsTransfer.new(lfs_object)
Gitlab::Geo::TransferRequest.new(transfer.request_data).header
end
before do
allow_any_instance_of(Gitlab::Geo::TransferRequest).to receive(:requesting_node).and_return(secondary_node)
end
it 'responds with 401 with invalid auth header' do
get api("/geo/transfers/lfs/#{lfs_object.id}"), nil, Authorization: 'Test'
expect(response.status).to eq 401
end
context 'LFS file exists' do
it 'responds with 200 with X-Sendfile' do
get api("/geo/transfers/lfs/#{lfs_object.id}"), nil, req_header
expect(response.status).to eq 200
expect(response.headers['Content-Type']).to eq('application/octet-stream')
expect(response.headers['X-Sendfile']).to eq(lfs_object.file.path)
end
end
context 'LFS object does not exist' do
it 'responds with 404' do
get api("/geo/transfers/lfs/100000"), nil, req_header
expect(response.status).to eq 404
end
end
end
end
require 'spec_helper'
describe Geo::FileUploadService, services: true do
let(:lfs_object) { create(:lfs_object, :with_file) }
let(:params) { { id: lfs_object.id, type: 'lfs' } }
let(:lfs_transfer) { Gitlab::Geo::LfsTransfer.new(lfs_object) }
let(:transfer_request) { Gitlab::Geo::TransferRequest.new(lfs_transfer.request_data) }
let(:req_header) { transfer_request.header['Authorization'] }
before do
create(:geo_node, :current)
end
describe '#execute' do
it 'sends LFS file' do
service = described_class.new(params, req_header)
response = service.execute
expect(response[:code]).to eq(:ok)
expect(response[:file].file.path).to eq(lfs_object.file.path)
end
it 'returns nil if no authorization' do
service = described_class.new(params, nil)
expect(service.execute).to be_nil
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment