Commit 06538eb1 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'merged-branch-names-cache-v2' into 'master'

Improvement to merged_branch_names cache

See merge request gitlab-org/gitlab!24504
parents 3971e04d 3d027d88
......@@ -65,8 +65,6 @@ class Repository
xcode_config: :xcode_project?
}.freeze
MERGED_BRANCH_NAMES_CACHE_DURATION = 10.minutes
def initialize(full_path, container, disk_path: nil, repo_type: Gitlab::GlRepository::PROJECT)
@full_path = full_path
@disk_path = disk_path || full_path
......@@ -914,6 +912,8 @@ class Repository
@root_ref_sha ||= commit(root_ref).sha
end
# If this method is not provided a set of branch names to check merge status,
# it fetches all branches.
def merged_branch_names(branch_names = [])
# Currently we should skip caching if requesting all branch names
# This is only used in a few places, notably app/services/branches/delete_merged_service.rb,
......@@ -922,30 +922,18 @@ class Repository
skip_cache = branch_names.empty? || Feature.disabled?(:merged_branch_names_redis_caching)
return raw_repository.merged_branch_names(branch_names) if skip_cache
cached_branch_names = cache.read(:merged_branch_names)
merged_branch_names_hash = cached_branch_names || {}
missing_branch_names = branch_names.select { |bn| !merged_branch_names_hash.key?(bn) }
# Track some metrics here whilst feature flag is enabled
if cached_branch_names.present?
counter = Gitlab::Metrics.counter(
:gitlab_repository_merged_branch_names_cache_hit,
"Count of cache hits for Repository#merged_branch_names"
)
counter.increment(full_hit: missing_branch_names.empty?)
end
cache = redis_hash_cache
if missing_branch_names.any?
merged_branch_names_hash = cache.fetch_and_add_missing(:merged_branch_names, branch_names) do |missing_branch_names, hash|
merged = raw_repository.merged_branch_names(missing_branch_names)
missing_branch_names.each do |bn|
merged_branch_names_hash[bn] = merged.include?(bn)
# Redis only stores strings in hset keys, use a fancy encoder
hash[bn] = Gitlab::Redis::Boolean.new(merged.include?(bn))
end
cache.write(:merged_branch_names, merged_branch_names_hash, expires_in: MERGED_BRANCH_NAMES_CACHE_DURATION)
end
Set.new(merged_branch_names_hash.select { |_, v| v }.keys)
Set.new(merged_branch_names_hash.select { |_, v| Gitlab::Redis::Boolean.true?(v) }.keys)
end
def merge_base(*commits_or_ids)
......@@ -1168,6 +1156,10 @@ class Repository
@redis_set_cache ||= Gitlab::RepositorySetCache.new(self)
end
def redis_hash_cache
@redis_hash_cache ||= Gitlab::RepositoryHashCache.new(self)
end
def request_store_cache
@request_store_cache ||= Gitlab::RepositoryCache.new(self, backend: Gitlab::SafeRequestStore)
end
......
---
title: Improvement to merged_branch_names cache
merge_request: 24504
author:
type: performance
# frozen_string_literal: true
# A serializer for boolean values being stored in Redis.
#
# This is to ensure that booleans are stored in a consistent and
# testable way when being stored as strings in Redis.
#
# Examples:
#
# bool = Gitlab::Redis::Boolean.new(true)
# bool.to_s == "_b:1"
#
# Gitlab::Redis::Boolean.encode(true)
# => "_b:1"
#
# Gitlab::Redis::Boolean.decode("_b:1")
# => true
#
# Gitlab::Redis::Boolean.true?("_b:1")
# => true
#
# Gitlab::Redis::Boolean.true?("_b:0")
# => false
module Gitlab
module Redis
class Boolean
LABEL = "_b"
DELIMITER = ":"
TRUE_STR = "1"
FALSE_STR = "0"
BooleanError = Class.new(StandardError)
NotABooleanError = Class.new(BooleanError)
NotAnEncodedBooleanStringError = Class.new(BooleanError)
def initialize(value)
@value = value
end
# @return [String] the encoded boolean
def to_s
self.class.encode(@value)
end
class << self
# Turn a boolean into a string for storage in Redis
#
# @param value [Boolean] true or false
# @return [String] the encoded boolean
# @raise [NotABooleanError] if the value isn't true or false
def encode(value)
raise NotABooleanError.new(value) unless bool?(value)
[LABEL, to_string(value)].join(DELIMITER)
end
# Decode a boolean string
#
# @param value [String] the stored boolean string
# @return [Boolean] true or false
# @raise [NotAnEncodedBooleanStringError] if the provided value isn't an encoded boolean
def decode(value)
raise NotAnEncodedBooleanStringError.new(value.class) unless value.is_a?(String)
label, bool_str = *value.split(DELIMITER, 2)
raise NotAnEncodedBooleanStringError.new(label) unless label == LABEL
from_string(bool_str)
end
# Decode a boolean string, then test if it's true
#
# @param value [String] the stored boolean string
# @return [Boolean] is the value true?
# @raise [NotAnEncodedBooleanStringError] if the provided value isn't an encoded boolean
def true?(encoded_value)
decode(encoded_value)
end
# Decode a boolean string, then test if it's false
#
# @param value [String] the stored boolean string
# @return [Boolean] is the value false?
# @raise [NotAnEncodedBooleanStringError] if the provided value isn't an encoded boolean
def false?(encoded_value)
!true?(encoded_value)
end
private
def bool?(value)
[true, false].include?(value)
end
def to_string(bool)
bool ? TRUE_STR : FALSE_STR
end
def from_string(str)
raise NotAnEncodedBooleanStringError.new(str) unless [TRUE_STR, FALSE_STR].include?(str)
str == TRUE_STR
end
end
end
end
end
......@@ -132,6 +132,11 @@ module Gitlab
raise NotImplementedError
end
# RepositoryHashCache to be used. Should be overridden by the including class
def redis_hash_cache
raise NotImplementedError
end
# List of cached methods. Should be overridden by the including class
def cached_methods
raise NotImplementedError
......@@ -215,6 +220,7 @@ module Gitlab
end
expire_redis_set_method_caches(methods)
expire_redis_hash_method_caches(methods)
expire_request_store_method_caches(methods)
end
......@@ -234,6 +240,10 @@ module Gitlab
methods.each { |name| redis_set_cache.expire(name) }
end
def expire_redis_hash_method_caches(methods)
methods.each { |name| redis_hash_cache.delete(name) }
end
# All cached repository methods depend on the existence of a Git repository,
# so if the repository doesn't exist, we already know not to call it.
def fallback_early?(method_name)
......
# frozen_string_literal: true
# Interface to the Redis-backed cache store for keys that use a Redis HSET.
# This is currently used as an incremental cache by the `Repository` model
# for `#merged_branch_names`. It works slightly differently to the other
# repository cache classes in that it is intended to work with partial
# caches which can be updated with new data, using the Redis hash system.
module Gitlab
class RepositoryHashCache
attr_reader :repository, :namespace, :expires_in
RepositoryHashCacheError = Class.new(StandardError)
InvalidKeysProvidedError = Class.new(RepositoryHashCacheError)
InvalidHashProvidedError = Class.new(RepositoryHashCacheError)
# @param repository [Repository]
# @param extra_namespace [String]
# @param expires_in [Integer] expiry time for hash store keys
def initialize(repository, extra_namespace: nil, expires_in: 1.day)
@repository = repository
@namespace = "#{repository.full_path}"
@namespace += ":#{repository.project.id}" if repository.project
@namespace = "#{@namespace}:#{extra_namespace}" if extra_namespace
@expires_in = expires_in
end
# @param type [String]
# @return [String]
def cache_key(type)
"#{type}:#{namespace}:hash"
end
# @param key [String]
# @return [Integer] 0 or 1 depending on success
def delete(key)
with { |redis| redis.del(cache_key(key)) }
end
# Check if the provided hash key exists in the hash.
#
# @param key [String]
# @param h_key [String] the key to check presence in Redis
# @return [True, False]
def key?(key, h_key)
with { |redis| redis.hexists(cache_key(key), h_key) }
end
# Read the values of a set of keys from the hash store, and return them
# as a hash of those keys and their values.
#
# @param key [String]
# @param hash_keys [Array<String>] an array of keys to retrieve from the store
# @return [Hash] a Ruby hash of the provided keys and their values from the store
def read_members(key, hash_keys = [])
raise InvalidKeysProvidedError unless hash_keys.is_a?(Array) && hash_keys.any?
with do |redis|
# Fetch an array of values for the supplied keys
values = redis.hmget(cache_key(key), hash_keys)
# Turn it back into a hash
hash_keys.zip(values).to_h
end
end
# Write a hash to the store. All keys and values will be strings when stored.
#
# @param key [String]
# @param hash [Hash] the hash to be written to Redis
# @return [Boolean] whether all operations were successful or not
def write(key, hash)
raise InvalidHashProvidedError unless hash.is_a?(Hash) && hash.any?
full_key = cache_key(key)
with do |redis|
results = redis.pipelined do
# Set each hash key to the provided value
hash.each do |h_key, h_value|
redis.hset(full_key, h_key, h_value)
end
# Update the expiry time for this hset
redis.expire(full_key, expires_in)
end
results.all?
end
end
# A variation on the `fetch` pattern of other cache stores. This method
# allows you to attempt to fetch a group of keys from the hash store, and
# for any keys that are missing values a block is then called to provide
# those values, which are then written back into Redis. Both sets of data
# are then combined and returned as one hash.
#
# @param key [String]
# @param h_keys [Array<String>] the keys to fetch or add to the cache
# @yieldparam missing_keys [Array<String>] the keys missing from the cache
# @yieldparam new_values [Hash] the hash to be populated by the block
# @return [Hash] the amalgamated hash of cached and uncached values
def fetch_and_add_missing(key, h_keys, &block)
# Check the cache for all supplied keys
cache_values = read_members(key, h_keys)
# Find the results which returned nil (meaning they're not in the cache)
missing = cache_values.select { |_, v| v.nil? }.keys
if missing.any?
new_values = {}
# Run the block, which updates the new_values hash
yield(missing, new_values)
# Ensure all values are converted to strings, to ensure merging hashes
# below returns standardised data.
new_values = standardize_hash(new_values)
# Write the new values to the hset
write(key, new_values)
# Merge the two sets of values to return a complete hash
cache_values.merge!(new_values)
end
record_metrics(key, cache_values, missing)
cache_values
end
private
def with(&blk)
Gitlab::Redis::Cache.with(&blk) # rubocop:disable CodeReuse/ActiveRecord
end
# Take a hash and convert both keys and values to strings, for insertion into Redis.
#
# @param hash [Hash]
# @return [Hash] the stringified hash
def standardize_hash(hash)
hash.map { |k, v| [k.to_s, v.to_s] }.to_h
end
# Record metrics in Prometheus.
#
# @param key [String] the basic key, e.g. :merged_branch_names. Not record-specific.
# @param cache_values [Hash] the hash response from the cache read
# @param missing_keys [Array<String>] the array of missing keys from the cache read
def record_metrics(key, cache_values, missing_keys)
cache_hits = cache_values.delete_if { |_, v| v.nil? }
# Increment the counter if we have hits
metrics_hit_counter.increment(full_hit: missing_keys.empty?, store_type: key) if cache_hits.any?
# Track the number of hits we got
metrics_hit_histogram.observe({ type: "hits", store_type: key }, cache_hits.size)
metrics_hit_histogram.observe({ type: "misses", store_type: key }, missing_keys.size)
end
def metrics_hit_counter
@counter ||= Gitlab::Metrics.counter(
:gitlab_repository_hash_cache_hit,
"Count of cache hits in Redis HSET"
)
end
def metrics_hit_histogram
@histogram ||= Gitlab::Metrics.histogram(
:gitlab_repository_hash_cache_size,
"Number of records in the HSET"
)
end
end
end
# frozen_string_literal: true
require "spec_helper"
describe Gitlab::Redis::Boolean do
subject(:redis_boolean) { described_class.new(bool) }
let(:bool) { true }
let(:label_section) { "#{described_class::LABEL}#{described_class::DELIMITER}" }
describe "#to_s" do
subject { redis_boolean.to_s }
context "true" do
let(:bool) { true }
it { is_expected.to eq("#{label_section}#{described_class::TRUE_STR}") }
end
context "false" do
let(:bool) { false }
it { is_expected.to eq("#{label_section}#{described_class::FALSE_STR}") }
end
end
describe ".encode" do
subject { redis_boolean.class.encode(bool) }
context "true" do
let(:bool) { true }
it { is_expected.to eq("#{label_section}#{described_class::TRUE_STR}") }
end
context "false" do
let(:bool) { false }
it { is_expected.to eq("#{label_section}#{described_class::FALSE_STR}") }
end
end
describe ".decode" do
subject { redis_boolean.class.decode(str) }
context "valid encoded bool" do
let(:str) { "#{label_section}#{bool_str}" }
context "true" do
let(:bool_str) { described_class::TRUE_STR }
it { is_expected.to be(true) }
end
context "false" do
let(:bool_str) { described_class::FALSE_STR }
it { is_expected.to be(false) }
end
end
context "partially invalid bool" do
let(:str) { "#{label_section}whoops" }
it "raises an error" do
expect { subject }.to raise_error(described_class::NotAnEncodedBooleanStringError)
end
end
context "invalid encoded bool" do
let(:str) { "whoops" }
it "raises an error" do
expect { subject }.to raise_error(described_class::NotAnEncodedBooleanStringError)
end
end
end
describe ".true?" do
subject { redis_boolean.class.true?(str) }
context "valid encoded bool" do
let(:str) { "#{label_section}#{bool_str}" }
context "true" do
let(:bool_str) { described_class::TRUE_STR }
it { is_expected.to be(true) }
end
context "false" do
let(:bool_str) { described_class::FALSE_STR }
it { is_expected.to be(false) }
end
end
context "partially invalid bool" do
let(:str) { "#{label_section}whoops" }
it "raises an error" do
expect { subject }.to raise_error(described_class::NotAnEncodedBooleanStringError)
end
end
context "invalid encoded bool" do
let(:str) { "whoops" }
it "raises an error" do
expect { subject }.to raise_error(described_class::NotAnEncodedBooleanStringError)
end
end
end
describe ".false?" do
subject { redis_boolean.class.false?(str) }
context "valid encoded bool" do
let(:str) { "#{label_section}#{bool_str}" }
context "true" do
let(:bool_str) { described_class::TRUE_STR }
it { is_expected.to be(false) }
end
context "false" do
let(:bool_str) { described_class::FALSE_STR }
it { is_expected.to be(true) }
end
end
context "partially invalid bool" do
let(:str) { "#{label_section}whoops" }
it "raises an error" do
expect { subject }.to raise_error(described_class::NotAnEncodedBooleanStringError)
end
end
context "invalid encoded bool" do
let(:str) { "whoops" }
it "raises an error" do
expect { subject }.to raise_error(described_class::NotAnEncodedBooleanStringError)
end
end
end
end
......@@ -7,6 +7,7 @@ describe Gitlab::RepositoryCacheAdapter do
let(:repository) { project.repository }
let(:cache) { repository.send(:cache) }
let(:redis_set_cache) { repository.send(:redis_set_cache) }
let(:redis_hash_cache) { repository.send(:redis_hash_cache) }
describe '#cache_method_output', :use_clean_rails_memory_store_caching do
let(:fallback) { 10 }
......@@ -212,6 +213,8 @@ describe Gitlab::RepositoryCacheAdapter do
expect(cache).to receive(:expire).with(:branch_names)
expect(redis_set_cache).to receive(:expire).with(:rendered_readme)
expect(redis_set_cache).to receive(:expire).with(:branch_names)
expect(redis_hash_cache).to receive(:delete).with(:rendered_readme)
expect(redis_hash_cache).to receive(:delete).with(:branch_names)
repository.expire_method_caches(%i(rendered_readme branch_names))
end
......
# frozen_string_literal: true
require "spec_helper"
describe Gitlab::RepositoryHashCache, :clean_gitlab_redis_cache do
let_it_be(:project) { create(:project) }
let(:repository) { project.repository }
let(:namespace) { "#{repository.full_path}:#{project.id}" }
let(:cache) { described_class.new(repository) }
let(:test_hash) do
{ "test" => "value" }
end
describe "#cache_key" do
subject { cache.cache_key(:example) }
it "includes the namespace" do
is_expected.to eq("example:#{namespace}:hash")
end
context "with a given namespace" do
let(:extra_namespace) { "my:data" }
let(:cache) { described_class.new(repository, extra_namespace: extra_namespace) }
it "includes the full namespace" do
is_expected.to eq("example:#{namespace}:#{extra_namespace}:hash")
end
end
end
describe "#delete" do
subject { cache.delete(:example) }
context "key exists" do
before do
cache.write(:example, test_hash)
end
it { is_expected.to eq(1) }
it "deletes the given key from the cache" do
subject
expect(cache.read_members(:example, ["test"])).to eq({ "test" => nil })
end
end
context "key doesn't exist" do
it { is_expected.to eq(0) }
end
end
describe "#key?" do
subject { cache.key?(:example, "test") }
context "key exists" do
before do
cache.write(:example, test_hash)
end
it { is_expected.to be(true) }
end
context "key doesn't exist" do
it { is_expected.to be(false) }
end
end
describe "#read_members" do
subject { cache.read_members(:example, keys) }
let(:keys) { %w(test missing) }
context "all data is cached" do
before do
cache.write(:example, test_hash.merge({ "missing" => false }))
end
it { is_expected.to eq({ "test" => "value", "missing" => "false" }) }
end
context "partial data is cached" do
before do
cache.write(:example, test_hash)
end
it { is_expected.to eq({ "test" => "value", "missing" => nil }) }
end
context "no data is cached" do
it { is_expected.to eq({ "test" => nil, "missing" => nil }) }
end
context "empty keys are passed for some reason" do
let(:keys) { [] }
it "raises an error" do
expect { subject }.to raise_error(Gitlab::RepositoryHashCache::InvalidKeysProvidedError)
end
end
end
describe "#write" do
subject { cache.write(:example, test_hash) }
it { is_expected.to be(true) }
it "actually writes stuff to Redis" do
subject
expect(cache.read_members(:example, ["test"])).to eq(test_hash)
end
end
describe "#fetch_and_add_missing" do
subject do
cache.fetch_and_add_missing(:example, keys) do |missing_keys, hash|
missing_keys.each do |key|
hash[key] = "was_missing"
end
end
end
let(:keys) { %w(test) }
it "records metrics" do
# Here we expect it to receive "test" as a missing key because we
# don't write to the cache before this test
expect(cache).to receive(:record_metrics).with(:example, { "test" => "was_missing" }, ["test"])
subject
end
context "fully cached" do
let(:keys) { %w(test another) }
before do
cache.write(:example, test_hash.merge({ "another" => "not_missing" }))
end
it "returns a hash" do
is_expected.to eq({ "test" => "value", "another" => "not_missing" })
end
it "doesn't write to the cache" do
expect(cache).not_to receive(:write)
subject
end
end
context "partially cached" do
let(:keys) { %w(test missing) }
before do
cache.write(:example, test_hash)
end
it "returns a hash" do
is_expected.to eq({ "test" => "value", "missing" => "was_missing" })
end
it "writes to the cache" do
expect(cache).to receive(:write).with(:example, { "missing" => "was_missing" })
subject
end
end
context "uncached" do
let(:keys) { %w(test missing) }
it "returns a hash" do
is_expected.to eq({ "test" => "was_missing", "missing" => "was_missing" })
end
it "writes to the cache" do
expect(cache).to receive(:write).with(:example, { "test" => "was_missing", "missing" => "was_missing" })
subject
end
end
end
end
......@@ -500,45 +500,62 @@ describe Repository do
let(:branch_names) { %w(test beep boop definitely_merged) }
let(:already_merged) { Set.new(["definitely_merged"]) }
let(:merge_state_hash) do
let(:write_hash) do
{
"test" => false,
"beep" => false,
"boop" => false,
"definitely_merged" => true
"test" => Gitlab::Redis::Boolean.new(false).to_s,
"beep" => Gitlab::Redis::Boolean.new(false).to_s,
"boop" => Gitlab::Redis::Boolean.new(false).to_s,
"definitely_merged" => Gitlab::Redis::Boolean.new(true).to_s
}
end
let_it_be(:cache) do
caching_config_hash = Gitlab::Redis::Cache.params
ActiveSupport::Cache.lookup_store(:redis_cache_store, caching_config_hash)
end
let(:repository_cache) do
Gitlab::RepositoryCache.new(repository, backend: Rails.cache)
let(:read_hash) do
{
"test" => Gitlab::Redis::Boolean.new(false).to_s,
"beep" => Gitlab::Redis::Boolean.new(false).to_s,
"boop" => Gitlab::Redis::Boolean.new(false).to_s,
"definitely_merged" => Gitlab::Redis::Boolean.new(true).to_s
}
end
let(:cache_key) { repository_cache.cache_key(:merged_branch_names) }
let(:cache) { repository.send(:redis_hash_cache) }
let(:cache_key) { cache.cache_key(:merged_branch_names) }
before do
allow(Rails).to receive(:cache) { cache }
allow(repository).to receive(:cache) { repository_cache }
allow(repository.raw_repository).to receive(:merged_branch_names).with(branch_names).and_return(already_merged)
end
it { is_expected.to eq(already_merged) }
it { is_expected.to be_a(Set) }
describe "cache expiry" do
before do
allow(cache).to receive(:delete).with(anything)
end
it "is expired when the branches caches are expired" do
expect(cache).to receive(:delete).with(:merged_branch_names).at_least(:once)
repository.send(:expire_branches_cache)
end
it "is expired when the repository caches are expired" do
expect(cache).to receive(:delete).with(:merged_branch_names).at_least(:once)
repository.send(:expire_all_method_caches)
end
end
context "cache is empty" do
before do
cache.delete(cache_key)
cache.delete(:merged_branch_names)
end
it { is_expected.to eq(already_merged) }
describe "cache values" do
it "writes the values to redis" do
expect(cache).to receive(:write).with(cache_key, merge_state_hash, expires_in: Repository::MERGED_BRANCH_NAMES_CACHE_DURATION)
expect(cache).to receive(:write).with(:merged_branch_names, write_hash)
subject
end
......@@ -546,14 +563,14 @@ describe Repository do
it "matches the supplied hash" do
subject
expect(cache.read(cache_key)).to eq(merge_state_hash)
expect(cache.read_members(:merged_branch_names, branch_names)).to eq(read_hash)
end
end
end
context "cache is not empty" do
before do
cache.write(cache_key, merge_state_hash)
cache.write(:merged_branch_names, write_hash)
end
it { is_expected.to eq(already_merged) }
......@@ -568,8 +585,8 @@ describe Repository do
context "cache is partially complete" do
before do
allow(repository.raw_repository).to receive(:merged_branch_names).with(["boop"]).and_return([])
hash = merge_state_hash.except("boop")
cache.write(cache_key, hash)
hash = write_hash.except("boop")
cache.write(:merged_branch_names, hash)
end
it { is_expected.to eq(already_merged) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment