Commit ffb9b3ef authored by Yorick Peterse's avatar Yorick Peterse

Refactor cache refreshing/expiring

This refactors repository caching so it's possible to selectively
refresh certain caches, instead of just expiring and refreshing
everything.

To allow this the various methods that were cached (e.g. "tag_count" and
"readme") use a similar pattern that makes expiring and refreshing
their data much easier.

In this new setup caches are refreshed as follows:

1. After a commit (but before running ProjectCacheWorker) we expire some
   basic caches such as the commit count and repository size.

2. ProjectCacheWorker will recalculate the commit count, repository
   size, then refresh a specific set of caches based on the list of
   files changed in a push payload.

This requires a bunch of changes to the various methods that may be
cached. For one, data should not be cached if a branch used or the
entire repository does not exist. To prevent all these methods from
handling this manually this is taken care of in
Repository#cache_method_output. Some methods still manually check for
the existence of a repository but this result is also cached.

With selective flushing implemented ProjectCacheWorker no longer uses an
exclusive lease for all of its work. Instead this worker only uses a
lease to limit the number of times the repository size is updated as
this is a fairly expensive operation.
parent 6f393877
......@@ -1086,7 +1086,7 @@ class Project < ActiveRecord::Base
"refs/heads/#{branch}",
force: true)
repository.copy_gitattributes(branch)
repository.expire_avatar_cache(branch)
repository.expire_avatar_cache
reload_default_branch
end
......
This diff is collapsed.
......@@ -18,7 +18,7 @@ class GitPushService < BaseService
#
def execute
@project.repository.after_create if @project.empty_repo?
@project.repository.after_push_commit(branch_name, params[:newrev])
@project.repository.after_push_commit(branch_name)
if push_remove_branch?
@project.repository.after_remove_branch
......@@ -51,12 +51,32 @@ class GitPushService < BaseService
execute_related_hooks
perform_housekeeping
update_caches
end
def update_gitattributes
@project.repository.copy_gitattributes(params[:ref])
end
def update_caches
if is_default_branch?
paths = Set.new
@push_commits.each do |commit|
commit.raw_diffs(deltas_only: true).each do |diff|
paths << diff.new_path
end
end
types = Gitlab::FileDetector.types_in_paths(paths.to_a)
else
types = []
end
ProjectCacheWorker.perform_async(@project.id, types)
end
protected
def execute_related_hooks
......@@ -70,7 +90,6 @@ class GitPushService < BaseService
@project.execute_hooks(build_push_data.dup, :push_hooks)
@project.execute_services(build_push_data.dup, :push_hooks)
Ci::CreatePipelineService.new(@project, current_user, build_push_data).execute
ProjectCacheWorker.perform_async(@project.id)
if push_remove_branch?
AfterBranchDeleteService
......
# Worker for updating any project specific caches.
#
# This worker runs at most once every 15 minutes per project. This is to ensure
# that multiple instances of jobs for this worker don't hammer the underlying
# storage engine as much.
class ProjectCacheWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
LEASE_TIMEOUT = 15.minutes.to_i
def self.lease_for(project_id)
Gitlab::ExclusiveLease.
new("project_cache_worker:#{project_id}", timeout: LEASE_TIMEOUT)
end
# project_id - The ID of the project for which to flush the cache.
# refresh - An Array containing extra types of data to refresh such as
# `:readme` to flush the README and `:changelog` to flush the
# CHANGELOG.
def perform(project_id, refresh = [])
project = Project.find_by(id: project_id)
# Overwrite Sidekiq's implementation so we only schedule when actually needed.
def self.perform_async(project_id)
# If a lease for this project is still being held there's no point in
# scheduling a new job.
super unless lease_for(project_id).exists?
end
return unless project && project.repository.exists?
def perform(project_id)
if try_obtain_lease_for(project_id)
Rails.logger.
info("Obtained ProjectCacheWorker lease for project #{project_id}")
else
Rails.logger.
info("Could not obtain ProjectCacheWorker lease for project #{project_id}")
return
end
update_repository_size(project)
project.update_commit_count
update_caches(project_id)
project.repository.refresh_method_caches(refresh.map(&:to_sym))
end
def update_caches(project_id)
project = Project.find(project_id)
def update_repository_size(project)
return unless try_obtain_lease_for(project.id, :update_repository_size)
return unless project.repository.exists?
Rails.logger.info("Updating repository size for project #{project.id}")
project.update_repository_size
project.update_commit_count
if project.repository.root_ref
project.repository.build_cache
end
end
def try_obtain_lease_for(project_id)
self.class.lease_for(project_id).try_obtain
private
def try_obtain_lease_for(project_id, section)
Gitlab::ExclusiveLease.
new("project_cache_worker:#{project_id}:#{section}", timeout: LEASE_TIMEOUT).
try_obtain
end
end
......@@ -1572,7 +1572,7 @@ describe Project, models: true do
end
it 'expires the avatar cache' do
expect(project.repository).to receive(:expire_avatar_cache).with(project.default_branch)
expect(project.repository).to receive(:expire_avatar_cache)
project.change_head(project.default_branch)
end
......
This diff is collapsed.
......@@ -14,7 +14,7 @@ describe API::API, api: true do
describe "GET /projects/:id/repository/branches" do
it "returns an array of project branches" do
project.repository.expire_cache
project.repository.expire_all_method_caches
get api("/projects/#{project.id}/repository/branches", user)
expect(response).to have_http_status(200)
......
......@@ -27,27 +27,14 @@ describe GitPushService, services: true do
it { is_expected.to be_truthy }
it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache).
with('master', newrev)
it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:after_push_commit).with('master')
subject
end
it 'flushes the visible content cache' do
expect(project.repository).to receive(:expire_has_visible_content_cache)
subject
end
it 'flushes the branches cache' do
expect(project.repository).to receive(:expire_branches_cache)
subject
end
it 'flushes the branch count cache' do
expect(project.repository).to receive(:expire_branch_count_cache)
it 'calls the after_create_branch hook' do
expect(project.repository).to receive(:after_create_branch)
subject
end
......@@ -56,21 +43,8 @@ describe GitPushService, services: true do
context 'existing branch' do
it { is_expected.to be_truthy }
it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache).
with('master', newrev)
subject
end
it 'does not flush the branches cache' do
expect(project.repository).not_to receive(:expire_branches_cache)
subject
end
it 'does not flush the branch count cache' do
expect(project.repository).not_to receive(:expire_branch_count_cache)
it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:after_push_commit).with('master')
subject
end
......@@ -81,27 +55,14 @@ describe GitPushService, services: true do
it { is_expected.to be_truthy }
it 'flushes the visible content cache' do
expect(project.repository).to receive(:expire_has_visible_content_cache)
subject
end
it 'flushes the branches cache' do
expect(project.repository).to receive(:expire_branches_cache)
subject
end
it 'flushes the branch count cache' do
expect(project.repository).to receive(:expire_branch_count_cache)
it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:after_push_commit).with('master')
subject
end
it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache).
with('master', newrev)
it 'calls the after_remove_branch hook' do
expect(project.repository).to receive(:after_remove_branch)
subject
end
......@@ -598,6 +559,51 @@ describe GitPushService, services: true do
end
end
describe '#update_caches' do
let(:service) do
described_class.new(project,
user,
oldrev: sample_commit.parent_id,
newrev: sample_commit.id,
ref: 'refs/heads/master')
end
context 'on the default branch' do
before do
allow(service).to receive(:is_default_branch?).and_return(true)
end
it 'flushes the caches of any special files that have been changed' do
commit = double(:commit)
diff = double(:diff, new_path: 'README.md')
expect(commit).to receive(:raw_diffs).with(deltas_only: true).
and_return([diff])
service.push_commits = [commit]
expect(ProjectCacheWorker).to receive(:perform_async).
with(project.id, %i(readme))
service.update_caches
end
end
context 'on a non-default branch' do
before do
allow(service).to receive(:is_default_branch?).and_return(false)
end
it 'does not flush any conditional caches' do
expect(ProjectCacheWorker).to receive(:perform_async).
with(project.id, []).
and_call_original
service.update_caches
end
end
end
def execute_service(project, user, oldrev, newrev, ref)
service = described_class.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref )
service.execute
......
......@@ -18,7 +18,7 @@ describe GitTagPushService, services: true do
end
it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache)
expect(project.repository).to receive(:before_push_tag)
subject
end
......@@ -28,12 +28,6 @@ describe GitTagPushService, services: true do
subject
end
it 'flushes the tag count cache' do
expect(project.repository).to receive(:expire_tag_count_cache)
subject
end
end
describe "Git Tag Push Data" do
......
......@@ -2,62 +2,78 @@ require 'spec_helper'
describe ProjectCacheWorker do
let(:project) { create(:project) }
let(:worker) { described_class.new }
subject { described_class.new }
describe '.perform_async' do
it 'schedules the job when no lease exists' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?).
and_return(false)
describe '#perform' do
before do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).
and_return(true)
end
expect_any_instance_of(described_class).to receive(:perform)
context 'with a non-existing project' do
it 'does nothing' do
expect(worker).not_to receive(:update_repository_size)
described_class.perform_async(project.id)
worker.perform(-1)
end
end
it 'does not schedule the job when a lease exists' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?).
and_return(true)
context 'with an existing project without a repository' do
it 'does nothing' do
allow_any_instance_of(Repository).to receive(:exists?).and_return(false)
expect_any_instance_of(described_class).not_to receive(:perform)
expect(worker).not_to receive(:update_repository_size)
described_class.perform_async(project.id)
worker.perform(project.id)
end
end
end
describe '#perform' do
context 'when an exclusive lease can be obtained' do
before do
allow(subject).to receive(:try_obtain_lease_for).with(project.id).
and_return(true)
end
context 'with an existing project' do
it 'updates the repository size' do
expect(worker).to receive(:update_repository_size).and_call_original
it 'updates project cache data' do
expect_any_instance_of(Repository).to receive(:size)
expect_any_instance_of(Repository).to receive(:commit_count)
worker.perform(project.id)
end
expect_any_instance_of(Project).to receive(:update_repository_size)
expect_any_instance_of(Project).to receive(:update_commit_count)
it 'updates the commit count' do
expect_any_instance_of(Project).to receive(:update_commit_count).
and_call_original
subject.perform(project.id)
worker.perform(project.id)
end
it 'handles missing repository data' do
expect_any_instance_of(Repository).to receive(:exists?).and_return(false)
expect_any_instance_of(Repository).not_to receive(:size)
it 'refreshes the method caches' do
expect_any_instance_of(Repository).to receive(:refresh_method_caches).
with(%i(readme)).
and_call_original
subject.perform(project.id)
worker.perform(project.id, %i(readme))
end
end
end
context 'when an exclusive lease can not be obtained' do
it 'does nothing' do
allow(subject).to receive(:try_obtain_lease_for).with(project.id).
describe '#update_repository_size' do
context 'when a lease could not be obtained' do
it 'does not update the repository size' do
allow(worker).to receive(:try_obtain_lease_for).
with(project.id, :update_repository_size).
and_return(false)
expect(subject).not_to receive(:update_caches)
expect(project).not_to receive(:update_repository_size)
worker.update_repository_size(project)
end
end
context 'when a lease could be obtained' do
it 'updates the repository size' do
allow(worker).to receive(:try_obtain_lease_for).
with(project.id, :update_repository_size).
and_return(true)
expect(project).to receive(:update_repository_size).and_call_original
subject.perform(project.id)
worker.update_repository_size(project)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment