Commit 0eff75fa authored by Nick Thomas's avatar Nick Thomas

Cache branch and tag names as Redis sets

This allows us to check inclusion for the *_exists? methods without
downloading the full list of branch names, which is over 100KiB in size
for gitlab-ce at the moment.
parent 648e5a94
......@@ -239,13 +239,13 @@ class Repository
def branch_exists?(branch_name)
return false unless raw_repository
branch_names.include?(branch_name)
branch_names_include?(branch_name)
end
def tag_exists?(tag_name)
return false unless raw_repository
tag_names.include?(tag_name)
tag_names_include?(tag_name)
end
def ref_exists?(ref)
......@@ -561,10 +561,10 @@ class Repository
end
delegate :branch_names, to: :raw_repository
cache_method :branch_names, fallback: []
cache_method_as_redis_set :branch_names, fallback: []
delegate :tag_names, to: :raw_repository
cache_method :tag_names, fallback: []
cache_method_as_redis_set :tag_names, fallback: []
delegate :branch_count, :tag_count, :has_visible_content?, to: :raw_repository
cache_method :branch_count, fallback: 0
......@@ -1126,6 +1126,10 @@ class Repository
@cache ||= Gitlab::RepositoryCache.new(self)
end
def redis_set_cache
@redis_set_cache ||= Gitlab::RepositorySetCache.new(self)
end
def request_store_cache
@request_store_cache ||= Gitlab::RepositoryCache.new(self, backend: Gitlab::SafeRequestStore)
end
......
---
title: Cache branch and tag names as Redis sets
merge_request: 30476
author:
type: performance
......@@ -23,6 +23,37 @@ module Gitlab
end
end
# Caches and strongly memoizes the method as a Redis Set.
#
# This only works for methods that do not take any arguments. The method
# should return an Array of Strings to be cached.
#
# In addition to overriding the named method, a "name_include?" method is
# defined. This uses the "SISMEMBER" query to efficiently check membership
# without needing to load the entire set into memory.
#
# name - The name of the method to be cached.
# fallback - A value to fall back to if the repository does not exist, or
# in case of a Git error. Defaults to nil.
def cache_method_as_redis_set(name, fallback: nil)
uncached_name = alias_uncached_method(name)
define_method(name) do
cache_method_output_as_redis_set(name, fallback: fallback) do
__send__(uncached_name) # rubocop:disable GitlabSecurity/PublicSend
end
end
define_method("#{name}_include?") do |value|
# If the cache isn't populated, we can't rely on it
return redis_set_cache.include?(name, value) if redis_set_cache.exist?(name)
# Since we have to pull all branch names to populate the cache, use
# the data we already have to answer the query just this once
__send__(name).include?(value) # rubocop:disable GitlabSecurity/PublicSend
end
end
# Caches truthy values from the method. All values are strongly memoized,
# and cached in RequestStore.
#
......@@ -84,6 +115,11 @@ module Gitlab
raise NotImplementedError
end
# RepositorySetCache to be used. Should be overridden by the including class
def redis_set_cache
raise NotImplementedError
end
# List of cached methods. Should be overridden by the including class
def cached_methods
raise NotImplementedError
......@@ -100,6 +136,18 @@ module Gitlab
end
end
# Caches and strongly memoizes the supplied block as a Redis Set. The result
# will be provided as a sorted array.
#
# name - The name of the method to be cached.
# fallback - A value to fall back to if the repository does not exist, or
# in case of a Git error. Defaults to nil.
def cache_method_output_as_redis_set(name, fallback: nil, &block)
memoize_method_output(name, fallback: fallback) do
redis_set_cache.fetch(name, &block).sort
end
end
# Caches truthy values from the supplied block. All values are strongly
# memoized, and cached in RequestStore.
#
......@@ -154,6 +202,7 @@ module Gitlab
clear_memoization(memoizable_name(name))
end
expire_redis_set_method_caches(methods)
expire_request_store_method_caches(methods)
end
......@@ -169,6 +218,10 @@ module Gitlab
end
end
def expire_redis_set_method_caches(methods)
methods.each { |name| redis_set_cache.expire(name) }
end
# All cached repository methods depend on the existence of a Git repository,
# so if the repository doesn't exist, we already know not to call it.
def fallback_early?(method_name)
......
# frozen_string_literal: true
# Interface to the Redis-backed cache store for keys that use a Redis set
module Gitlab
class RepositorySetCache
attr_reader :repository, :namespace, :expires_in
def initialize(repository, extra_namespace: nil, expires_in: 2.weeks)
@repository = repository
@namespace = "#{repository.full_path}:#{repository.project.id}"
@namespace = "#{@namespace}:#{extra_namespace}" if extra_namespace
@expires_in = expires_in
end
def cache_key(type)
[type, namespace, 'set'].join(':')
end
def expire(key)
with { |redis| redis.del(cache_key(key)) }
end
def exist?(key)
with { |redis| redis.exists(cache_key(key)) }
end
def read(key)
with { |redis| redis.smembers(cache_key(key)) }
end
def write(key, value)
full_key = cache_key(key)
with do |redis|
redis.multi do
redis.del(full_key)
# Splitting into groups of 1000 prevents us from creating a too-long
# Redis command
value.in_groups_of(1000, false) { |subset| redis.sadd(full_key, subset) }
redis.expire(full_key, expires_in)
end
end
value
end
def fetch(key, &block)
if exist?(key)
read(key)
else
write(key, yield)
end
end
def include?(key, value)
with { |redis| redis.sismember(cache_key(key), value) }
end
private
def with(&blk)
Gitlab::Redis::Cache.with(&blk) # rubocop:disable CodeReuse/ActiveRecord
end
end
end
......@@ -4,6 +4,7 @@ describe Gitlab::RepositoryCacheAdapter do
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
let(:cache) { repository.send(:cache) }
let(:redis_set_cache) { repository.send(:redis_set_cache) }
describe '#cache_method_output', :use_clean_rails_memory_store_caching do
let(:fallback) { 10 }
......@@ -206,9 +207,11 @@ describe Gitlab::RepositoryCacheAdapter do
describe '#expire_method_caches' do
it 'expires the caches of the given methods' do
expect(cache).to receive(:expire).with(:rendered_readme)
expect(cache).to receive(:expire).with(:gitignore)
expect(cache).to receive(:expire).with(:branch_names)
expect(redis_set_cache).to receive(:expire).with(:rendered_readme)
expect(redis_set_cache).to receive(:expire).with(:branch_names)
repository.expire_method_caches(%i(rendered_readme gitignore))
repository.expire_method_caches(%i(rendered_readme branch_names))
end
it 'does not expire caches for non-existent methods' do
......
require 'spec_helper'
describe Gitlab::RepositorySetCache, :clean_gitlab_redis_cache do
let(:project) { create(:project) }
let(:repository) { project.repository }
let(:namespace) { "#{repository.full_path}:#{project.id}" }
let(:cache) { described_class.new(repository) }
describe '#cache_key' do
subject { cache.cache_key(:foo) }
it 'includes the namespace' do
is_expected.to eq("foo:#{namespace}:set")
end
context 'with a given namespace' do
let(:extra_namespace) { 'my:data' }
let(:cache) { described_class.new(repository, extra_namespace: extra_namespace) }
it 'includes the full namespace' do
is_expected.to eq("foo:#{namespace}:#{extra_namespace}:set")
end
end
end
describe '#expire' do
it 'expires the given key from the cache' do
cache.write(:foo, ['value'])
expect(cache.read(:foo)).to contain_exactly('value')
expect(cache.expire(:foo)).to eq(1)
expect(cache.read(:foo)).to be_empty
end
end
describe '#exist?' do
it 'checks whether the key exists' do
expect(cache.exist?(:foo)).to be(false)
cache.write(:foo, ['value'])
expect(cache.exist?(:foo)).to be(true)
end
end
describe '#fetch' do
let(:blk) { -> { ['block value'] } }
subject { cache.fetch(:foo, &blk) }
it 'fetches the key from the cache when filled' do
cache.write(:foo, ['value'])
is_expected.to contain_exactly('value')
end
it 'writes the value of the provided block when empty' do
cache.expire(:foo)
is_expected.to contain_exactly('block value')
expect(cache.read(:foo)).to contain_exactly('block value')
end
end
describe '#include?' do
it 'checks inclusion in the Redis set' do
cache.write(:foo, ['value'])
expect(cache.include?(:foo, 'value')).to be(true)
expect(cache.include?(:foo, 'bar')).to be(false)
end
end
end
......@@ -1223,36 +1223,66 @@ describe Repository do
end
describe '#branch_exists?' do
it 'uses branch_names' do
allow(repository).to receive(:branch_names).and_return(['foobar'])
let(:branch) { repository.root_ref }
expect(repository.branch_exists?('foobar')).to eq(true)
expect(repository.branch_exists?('master')).to eq(false)
subject { repository.branch_exists?(branch) }
it 'delegates to branch_names when the cache is empty' do
repository.expire_branches_cache
expect(repository).to receive(:branch_names).and_call_original
is_expected.to eq(true)
end
it 'uses redis set caching when the cache is filled' do
repository.branch_names # ensure the branch name cache is filled
expect(repository)
.to receive(:branch_names_include?)
.with(branch)
.and_call_original
is_expected.to eq(true)
end
end
describe '#tag_exists?' do
it 'uses tag_names' do
allow(repository).to receive(:tag_names).and_return(['foobar'])
let(:tag) { repository.tags.first.name }
expect(repository.tag_exists?('foobar')).to eq(true)
expect(repository.tag_exists?('master')).to eq(false)
subject { repository.tag_exists?(tag) }
it 'delegates to tag_names when the cache is empty' do
repository.expire_tags_cache
expect(repository).to receive(:tag_names).and_call_original
is_expected.to eq(true)
end
it 'uses redis set caching when the cache is filled' do
repository.tag_names # ensure the tag name cache is filled
expect(repository)
.to receive(:tag_names_include?)
.with(tag)
.and_call_original
is_expected.to eq(true)
end
end
describe '#branch_names', :use_clean_rails_memory_store_caching do
describe '#branch_names', :clean_gitlab_redis_cache do
let(:fake_branch_names) { ['foobar'] }
it 'gets cached across Repository instances' do
allow(repository.raw_repository).to receive(:branch_names).once.and_return(fake_branch_names)
expect(repository.branch_names).to eq(fake_branch_names)
expect(repository.branch_names).to match_array(fake_branch_names)
fresh_repository = Project.find(project.id).repository
expect(fresh_repository.object_id).not_to eq(repository.object_id)
expect(fresh_repository.raw_repository).not_to receive(:branch_names)
expect(fresh_repository.branch_names).to eq(fake_branch_names)
expect(fresh_repository.branch_names).to match_array(fake_branch_names)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment