Commit 4e2bb4e5 authored by Stan Hu's avatar Stan Hu

Reduce Gitaly calls in PostReceive

This commit reduces I/O load and memory utilization during PostReceive
for the common case when no project hooks or services are set up.

We saw a Gitaly N+1 issue in `CommitDelta` when many tags or branches
are pushed. We can reduce this overhead in the common case because we
observe that most new projects do not have any Web hooks or services,
especially when they are first created. Previously, `BaseHooksService`
unconditionally iterated through the last 20 commits of each ref to
build the `push_data` structure. The `push_data` structured was used in
numerous places:

1. Building the push payload in `EventCreateService`
2. Creating a CI pipeline
3. Executing project Web or system hooks
4. Executing project services
5. As the return value of `BaseHooksService#execute`
6. `BranchHooksService#invalidated_file_types`

We only need to generate the full `push_data` for items 3, 4, and 6.

Item 1: `EventCreateService` only needs the last commit and doesn't
actually need the commit deltas.

Item 2: In addition, `Ci::CreatePipelineService` only needed a subset of
the parameters.

Item 5: The return value of `BaseHooksService#execute` also wasn't being
used anywhere.

Item 6: This is only used when pushing to the default branch, so if
many tags are pushed we can save significant I/O here.

Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/65878

Fic
parent 6480bd6d
......@@ -1230,6 +1230,14 @@ class Project < ApplicationRecord
end
end
def has_active_hooks?(hooks_scope = :push_hooks)
hooks.hooks_for(hooks_scope).any? || SystemHook.hooks_for(hooks_scope).any?
end
def has_active_services?(hooks_scope = :push_hooks)
services.public_send(hooks_scope).any? # rubocop:disable GitlabSecurity/PublicSend
end
def valid_repo?
repository.exists?
rescue
......
......@@ -19,7 +19,7 @@ module Git
update_remote_mirrors
push_data
success
end
private
......@@ -33,7 +33,7 @@ module Git
end
def limited_commits
commits.last(PROCESS_COMMIT_LIMIT)
@limited_commits ||= commits.last(PROCESS_COMMIT_LIMIT)
end
def commits_count
......@@ -48,21 +48,25 @@ module Git
[]
end
# Push events in the activity feed only show information for the
# last commit.
def create_events
EventCreateService.new.push(project, current_user, push_data)
EventCreateService.new.push(project, current_user, event_push_data)
end
def create_pipelines
return unless params.fetch(:create_pipelines, true)
Ci::CreatePipelineService
.new(project, current_user, push_data)
.new(project, current_user, base_params)
.execute(:push, pipeline_options)
end
def execute_project_hooks
project.execute_hooks(push_data, hook_name)
project.execute_services(push_data, hook_name)
# Creating push_data invokes one CommitDelta RPC per commit. Only
# build this data if we actually need it.
project.execute_hooks(push_data, hook_name) if project.has_active_hooks?(hook_name)
project.execute_services(push_data, hook_name) if project.has_active_services?(hook_name)
end
def enqueue_invalidate_cache
......@@ -73,18 +77,35 @@ module Git
)
end
def push_data
@push_data ||= Gitlab::DataBuilder::Push.build(
project: project,
user: current_user,
def base_params
{
oldrev: params[:oldrev],
newrev: params[:newrev],
ref: params[:ref],
commits: limited_commits,
push_options: params[:push_options] || {}
}
end
def push_data_params(commits:, with_changed_files: true)
base_params.merge(
project: project,
user: current_user,
commits: commits,
message: event_message,
commits_count: commits_count,
push_options: params[:push_options] || {}
with_changed_files: with_changed_files
)
end
def event_push_data
# We only need the last commit for the event push, and we don't
# need the full deltas either.
@event_push_data ||= Gitlab::DataBuilder::Push.build(
push_data_params(commits: commits.last, with_changed_files: false))
end
def push_data
@push_data ||= Gitlab::DataBuilder::Push.build(push_data_params(commits: limited_commits))
# Dependent code may modify the push data, so return a duplicate each time
@push_data.dup
......
---
title: Reduce Gitaly calls in PostReceive
merge_request: 31741
author:
type: performance
......@@ -60,7 +60,8 @@ module Gitlab
# rubocop:disable Metrics/ParameterLists
def build(
project:, user:, ref:, oldrev: nil, newrev: nil,
commits: [], commits_count: nil, message: nil, push_options: {})
commits: [], commits_count: nil, message: nil, push_options: {},
with_changed_files: true)
commits = Array(commits)
......@@ -75,7 +76,7 @@ module Gitlab
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/38259
commit_attrs = Gitlab::GitalyClient.allow_n_plus_1_calls do
commits_limited.map do |commit|
commit.hook_attrs(with_changed_files: true)
commit.hook_attrs(with_changed_files: with_changed_files)
end
end
......
......@@ -3,9 +3,43 @@
require 'spec_helper'
describe Gitlab::DataBuilder::Push do
include RepoHelpers
let(:project) { create(:project, :repository) }
let(:user) { build(:user, public_email: 'public-email@example.com') }
describe '.build' do
let(:sample) { RepoHelpers.sample_compare }
let(:commits) { project.repository.commits_between(sample.commits.first, sample.commits.last) }
let(:subject) do
described_class.build(project: project,
user: user,
ref: sample.target_branch,
commits: commits,
commits_count: commits.length,
message: 'test message',
with_changed_files: with_changed_files)
end
context 'with changed files' do
let(:with_changed_files) { true }
it 'returns commit hook data' do
expect(subject[:project]).to eq(project.hook_attrs)
expect(subject[:commits].first.keys).to include(*%i(added removed modified))
end
end
context 'without changed files' do
let(:with_changed_files) { false }
it 'returns commit hook data without include deltas' do
expect(subject[:project]).to eq(project.hook_attrs)
expect(subject[:commits].first.keys).not_to include(*%i(added removed modified))
end
end
end
describe '.build_sample' do
let(:data) { described_class.build_sample(project, user) }
......
......@@ -4297,6 +4297,39 @@ describe Project do
end
end
describe '#has_active_hooks?' do
set(:project) { create(:project) }
it { expect(project.has_active_hooks?).to be_falsey }
it 'returns true when a matching push hook exists' do
create(:project_hook, push_events: true, project: project)
expect(project.has_active_hooks?(:merge_request_events)).to be_falsey
expect(project.has_active_hooks?).to be_truthy
end
it 'returns true when a matching system hook exists' do
create(:system_hook, push_events: true)
expect(project.has_active_hooks?(:merge_request_events)).to be_falsey
expect(project.has_active_hooks?).to be_truthy
end
end
describe '#has_active_services?' do
set(:project) { create(:project) }
it { expect(project.has_active_services?).to be_falsey }
it 'returns true when a matching service exists' do
create(:custom_issue_tracker_service, push_events: true, merge_requests_events: false, project: project)
expect(project.has_active_services?(:merge_request_hooks)).to be_falsey
expect(project.has_active_services?).to be_truthy
end
end
describe '#badges' do
let(:project_group) { create(:group) }
let(:project) { create(:project, path: 'avatar', namespace: project_group) }
......
......@@ -14,6 +14,78 @@ describe Git::BaseHooksService do
let(:newrev) { "8a2a6eb295bb170b34c24c76c49ed0e9b2eaf34b" } # gitlab-test: git rev-parse refs/tags/v1.1.0
let(:ref) { 'refs/tags/v1.1.0' }
describe '#execute_project_hooks' do
class TestService < described_class
def hook_name
:push_hooks
end
def commits
[]
end
end
let(:project) { create(:project, :repository) }
subject { TestService.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref) }
context '#execute_hooks' do
before do
expect(project).to receive(:has_active_hooks?).and_return(active)
end
context 'active hooks' do
let(:active) { true }
it 'executes the hooks' do
expect(subject).to receive(:push_data).at_least(:once).and_call_original
expect(project).to receive(:execute_hooks)
subject.execute
end
end
context 'inactive hooks' do
let(:active) { false }
it 'does not execute the hooks' do
expect(subject).not_to receive(:push_data)
expect(project).not_to receive(:execute_hooks)
subject.execute
end
end
end
context '#execute_services' do
before do
expect(project).to receive(:has_active_services?).and_return(active)
end
context 'active services' do
let(:active) { true }
it 'executes the services' do
expect(subject).to receive(:push_data).at_least(:once).and_call_original
expect(project).to receive(:execute_services)
subject.execute
end
end
context 'inactive services' do
let(:active) { false }
it 'does not execute the services' do
expect(subject).not_to receive(:push_data)
expect(project).not_to receive(:execute_services)
subject.execute
end
end
end
end
describe 'with remote mirrors' do
class TestService < described_class
def commits
......
......@@ -25,7 +25,7 @@ describe Git::BranchHooksService do
end
describe "Git Push Data" do
subject(:push_data) { service.execute }
subject(:push_data) { service.send(:push_data) }
it 'has expected push data attributes' do
is_expected.to match a_hash_including(
......@@ -109,6 +109,7 @@ describe Git::BranchHooksService do
expect(event.push_event_payload).to be_an_instance_of(PushEventPayload)
expect(event.push_event_payload.commit_from).to eq(oldrev)
expect(event.push_event_payload.commit_to).to eq(newrev)
expect(event.push_event_payload.commit_title).to eq('Change some files')
expect(event.push_event_payload.ref).to eq('master')
expect(event.push_event_payload.commit_count).to eq(1)
end
......@@ -124,6 +125,7 @@ describe Git::BranchHooksService do
expect(event.push_event_payload).to be_an_instance_of(PushEventPayload)
expect(event.push_event_payload.commit_from).to be_nil
expect(event.push_event_payload.commit_to).to eq(newrev)
expect(event.push_event_payload.commit_title).to eq('Initial commit')
expect(event.push_event_payload.ref).to eq('master')
expect(event.push_event_payload.commit_count).to be > 1
end
......
......@@ -78,7 +78,10 @@ describe Git::BranchPushService, services: true do
it "creates a new pipeline" do
expect { subject }.to change { Ci::Pipeline.count }
expect(Ci::Pipeline.last).to be_push
pipeline = Ci::Pipeline.last
expect(pipeline).to be_push
expect(Gitlab::Git::BRANCH_REF_PREFIX + pipeline.ref).to eq(ref)
end
end
......@@ -123,6 +126,10 @@ describe Git::BranchPushService, services: true do
describe "Webhooks" do
context "execute webhooks" do
before do
create(:project_hook, push_events: true, project: project)
end
it "when pushing a branch for the first time" do
expect(project).to receive(:execute_hooks)
expect(project.default_branch).to eq("master")
......
......@@ -26,7 +26,8 @@ describe Git::TagHooksService, :service do
describe 'System hooks' do
it 'Executes system hooks' do
push_data = service.execute
push_data = service.send(:push_data)
expect(project).to receive(:has_active_hooks?).and_return(true)
expect_next_instance_of(SystemHooksService) do |system_hooks_service|
expect(system_hooks_service)
......@@ -40,6 +41,7 @@ describe Git::TagHooksService, :service do
describe "Webhooks" do
it "executes hooks on the project" do
expect(project).to receive(:has_active_hooks?).and_return(true)
expect(project).to receive(:execute_hooks)
service.execute
......@@ -61,7 +63,7 @@ describe Git::TagHooksService, :service do
describe 'Push data' do
shared_examples_for 'tag push data expectations' do
subject(:push_data) { service.execute }
subject(:push_data) { service.send(:push_data) }
it 'has expected push data attributes' do
is_expected.to match a_hash_including(
object_kind: 'tag_push',
......
......@@ -222,6 +222,8 @@ describe PostReceive do
end
it "asks the project to trigger all hooks" do
create(:project_hook, push_events: true, tag_push_events: true, project: project)
create(:custom_issue_tracker_service, push_events: true, merge_requests_events: false, project: project)
allow(Project).to receive(:find_by).and_return(project)
expect(project).to receive(:execute_hooks).twice
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment