Commit 3649b65e authored by Mike Kozono's avatar Mike Kozono

Geo: Improve performance of LFS object queries

See discussion of various approaches at:
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/42423#note_418699307
parent 81c3e33c
...@@ -23,7 +23,7 @@ module EE ...@@ -23,7 +23,7 @@ module EE
def replicables_for_geo_node(primary_key_in, node = ::Gitlab::Geo.current_node) def replicables_for_geo_node(primary_key_in, node = ::Gitlab::Geo.current_node)
local_storage_only = !node.sync_object_storage local_storage_only = !node.sync_object_storage
scope = node.lfs_objects.primary_key_in(primary_key_in) scope = node.lfs_objects(primary_key_in: primary_key_in)
scope = scope.with_files_stored_locally if local_storage_only scope = scope.with_files_stored_locally if local_storage_only
scope scope
end end
......
...@@ -265,23 +265,17 @@ class GeoNode < ApplicationRecord ...@@ -265,23 +265,17 @@ class GeoNode < ApplicationRecord
designs.where(id: project_id).exists? designs.where(id: project_id).exists?
end end
def lfs_objects # @param primary_key_in [Range, LfsObject] arg to pass to primary_key_in scope
return LfsObject.all unless selective_sync? # @return [ActiveRecord::Relation<LfsObject>] scope of LfsObject filtered by selective sync settings and primary key arg
def lfs_objects(primary_key_in:)
query = LfsObjectsProject.project_id_in(projects).select(:lfs_object_id).distinct return LfsObject.primary_key_in(primary_key_in) unless selective_sync?
cte = Gitlab::SQL::CTE.new(:restricted_lfs_objects, query)
lfs_object_table = LfsObject.arel_table ids = LfsObjectsProject.project_id_in(projects)
.where(lfs_object_id: primary_key_in)
inner_join_restricted_lfs_objects = .select(:lfs_object_id)
cte.table .distinct
.join(lfs_object_table, Arel::Nodes::InnerJoin)
.on(cte.table[:lfs_object_id].eq(lfs_object_table[:id])) LfsObject.where(id: ids)
.join_sources
LfsObject
.with(cte.to_arel)
.from(cte.table)
.joins(inner_join_restricted_lfs_objects)
end end
def projects def projects
......
---
title: 'Geo: Improve performance of LFS objects queries'
merge_request: 42423
author:
type: performance
...@@ -835,9 +835,11 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do ...@@ -835,9 +835,11 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_5) create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_5)
end end
subject(:lfs_objects) { node.lfs_objects(primary_key_in: 1..LfsObject.last.id) }
context 'without selective sync' do context 'without selective sync' do
it 'returns all projects without selective sync' do it 'returns all projects without selective sync' do
expect(node.lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3, lfs_object_4, lfs_object_5]) expect(lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3, lfs_object_4, lfs_object_5])
end end
end end
...@@ -847,14 +849,14 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do ...@@ -847,14 +849,14 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do
end end
it 'excludes LFS objects that are not in selectively synced projects' do it 'excludes LFS objects that are not in selectively synced projects' do
expect(node.lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3]) expect(lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3])
end end
it 'excludes LFS objects from fork networks' do it 'excludes LFS objects from fork networks' do
forked_project = create(:project, group: synced_group) forked_project = create(:project, group: synced_group)
create(:lfs_objects_project, project: forked_project, lfs_object: lfs_object_1) create(:lfs_objects_project, project: forked_project, lfs_object: lfs_object_1)
expect(node.lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3]) expect(lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3])
end end
end end
...@@ -864,14 +866,14 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do ...@@ -864,14 +866,14 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do
end end
it 'excludes LFS objects that are not in selectively synced shards' do it 'excludes LFS objects that are not in selectively synced shards' do
expect(node.lfs_objects).to match_array([lfs_object_5]) expect(lfs_objects).to match_array([lfs_object_5])
end end
it 'excludes LFS objects from fork networks' do it 'excludes LFS objects from fork networks' do
forked_project = create(:project, :broken_storage) forked_project = create(:project, :broken_storage)
create(:lfs_objects_project, project: forked_project, lfs_object: lfs_object_5) create(:lfs_objects_project, project: forked_project, lfs_object: lfs_object_5)
expect(node.lfs_objects).to match_array([lfs_object_5]) expect(lfs_objects).to match_array([lfs_object_5])
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment