Commit 11f8136d authored by Stan Hu's avatar Stan Hu

Eliminate Gitaly N+1 queries loading submodules

When a directory contains many submodules, we make a number of Gitaly
N+1 queries:

1. One TreeEntry RPC to look up each submodule
2. One TreeEntry RPC to read the `.gitmodules` file

The first item is redundant because we've already determined the tree
commit is a submodule. We can eliminate this query by skipping it
altogether.

The second item can be optimized by caching the contents of
`.gitmodules` once per directory. We already have a mechanism for
caching this in `SubmoduleLinks`. Now we just need to use it.

This significantly reduces Gitaly RPCs and closes two issues
with one stone:

1. https://gitlab.com/gitlab-org/gitlab/issues/35333
2. https://gitlab.com/gitlab-org/gitlab/issues/194380
parent ad1f7390
...@@ -7,9 +7,7 @@ module SubmoduleHelper ...@@ -7,9 +7,7 @@ module SubmoduleHelper
# links to files listing for submodule if submodule is a project on this server # links to files listing for submodule if submodule is a project on this server
def submodule_links(submodule_item, ref = nil, repository = @repository) def submodule_links(submodule_item, ref = nil, repository = @repository)
url = repository.submodule_url_for(ref, submodule_item.path) repository.submodule_links.for(submodule_item, ref)
submodule_links_for_url(submodule_item.id, url, repository)
end end
def submodule_links_for_url(submodule_item_id, url, repository) def submodule_links_for_url(submodule_item_id, url, repository)
......
...@@ -1094,6 +1094,10 @@ class Repository ...@@ -1094,6 +1094,10 @@ class Repository
message: message) message: message)
end end
def submodule_links
@submodule_links ||= ::Gitlab::SubmoduleLinks.new(self)
end
def update_submodule(user, submodule, commit_sha, message:, branch:) def update_submodule(user, submodule, commit_sha, message:, branch:)
with_cache_hooks do with_cache_hooks do
raw.update_submodule( raw.update_submodule(
......
---
title: Eliminate Gitaly N+1 queries loading submodules
merge_request: 23292
author:
type: performance
...@@ -8,6 +8,11 @@ describe SubmoduleHelper do ...@@ -8,6 +8,11 @@ describe SubmoduleHelper do
let(:submodule_item) { double(id: 'hash', path: 'rack') } let(:submodule_item) { double(id: 'hash', path: 'rack') }
let(:config) { Gitlab.config.gitlab } let(:config) { Gitlab.config.gitlab }
let(:repo) { double } let(:repo) { double }
let(:submodules) { Gitlab::SubmoduleLinks.new(repo) }
before do
allow(repo).to receive(:submodule_links).and_return(submodules)
end
shared_examples 'submodule_links' do shared_examples 'submodule_links' do
context 'submodule on self' do context 'submodule on self' do
...@@ -163,7 +168,7 @@ describe SubmoduleHelper do ...@@ -163,7 +168,7 @@ describe SubmoduleHelper do
let(:repo) { double(:repo, project: project) } let(:repo) { double(:repo, project: project) }
def expect_relative_link_to_resolve_to(relative_path, expected_path) def expect_relative_link_to_resolve_to(relative_path, expected_path)
allow(repo).to receive(:submodule_url_for).and_return(relative_path) stub_url(relative_path)
result = subject result = subject
expect(result).to eq([expected_path, "#{expected_path}/tree/#{submodule_item.id}"]) expect(result).to eq([expected_path, "#{expected_path}/tree/#{submodule_item.id}"])
...@@ -183,7 +188,7 @@ describe SubmoduleHelper do ...@@ -183,7 +188,7 @@ describe SubmoduleHelper do
context 'repo path resolves to be located at root (namespace absent)' do context 'repo path resolves to be located at root (namespace absent)' do
it 'returns nil' do it 'returns nil' do
allow(repo).to receive(:submodule_url_for).and_return('../../test.git') stub_url('../../test.git')
result = subject result = subject
...@@ -193,7 +198,7 @@ describe SubmoduleHelper do ...@@ -193,7 +198,7 @@ describe SubmoduleHelper do
context 'repo path resolves to be located underneath current project path' do context 'repo path resolves to be located underneath current project path' do
it 'returns nil because it is not possible to have repo nested under another repo' do it 'returns nil because it is not possible to have repo nested under another repo' do
allow(repo).to receive(:submodule_url_for).and_return('./test.git') stub_url('./test.git')
result = subject result = subject
...@@ -263,6 +268,7 @@ describe SubmoduleHelper do ...@@ -263,6 +268,7 @@ describe SubmoduleHelper do
end end
def stub_url(url) def stub_url(url)
allow(submodules).to receive(:submodule_url_for).and_return(url)
allow(repo).to receive(:submodule_url_for).and_return(url) allow(repo).to receive(:submodule_url_for).and_return(url)
end end
end end
...@@ -2714,4 +2714,10 @@ describe Repository do ...@@ -2714,4 +2714,10 @@ describe Repository do
.to change { Gitlab::GitalyClient.get_request_count }.by(1) .to change { Gitlab::GitalyClient.get_request_count }.by(1)
end end
end end
describe '#submodule_links' do
it 'returns an instance of Gitlab::SubmoduleLinks' do
expect(repository.submodule_links).to be_a(Gitlab::SubmoduleLinks)
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment