Fix `Ci::Artifactable#selective_sync_scope` cross-join DBs

Changelog: fixed
EE: true
parent a014c2b5
...@@ -11,21 +11,32 @@ module EE ...@@ -11,21 +11,32 @@ module EE
def replicables_for_current_secondary(primary_key_in) def replicables_for_current_secondary(primary_key_in)
node = ::Gitlab::Geo.current_node node = ::Gitlab::Geo.current_node
replicables =
primary_key_in(primary_key_in) primary_key_in(primary_key_in)
.merge(selective_sync_scope(node))
.merge(object_storage_scope(node)) .merge(object_storage_scope(node))
selective_sync_scope(node, replicables)
end end
# @return [ActiveRecord::Relation<Ci::{Pipeline|Job}PipelineArtifact>] observing object storage settings of the given node
def object_storage_scope(node) def object_storage_scope(node)
return all if node.sync_object_storage? return all if node.sync_object_storage?
with_files_stored_locally with_files_stored_locally
end end
def selective_sync_scope(node) # The primary_key_in in replicables_for_current_secondary method is at most a range of IDs with a maximum of 10_000 records
return all unless node.selective_sync? # between them. We can additionally reduce the batch size to 1_000 just for pipeline artifacts and job artifacts if needed.
#
# @return [ActiveRecord::Relation<Ci::{Pipeline|Job}PipelineArtifact>] observing selective sync settings of the given node
def selective_sync_scope(node, replicables)
return replicables unless node.selective_sync?
# Note that we can't do node.projects.ids since it can have millions of records.
replicables_project_ids = replicables.distinct.pluck(:project_id)
selective_projects_ids = node.projects.id_in(replicables_project_ids).pluck_primary_key
project_id_in(node.projects) replicables.project_id_in(selective_projects_ids)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment