Fix `Ci::Artifactable#selective_sync_scope` cross-join DBs

Changelog: fixed EE: true

Fix `Ci::Artifactable#selective_sync_scope` cross-join DBs
Changelog: fixed EE: true
6ac86e57 · Douglas Barbosa Alexandre · a014c2b5 · 6ac86e57
Commit 6ac86e57 authored Jul 22, 2021 by Douglas Barbosa Alexandre
Show whitespace changes
Inline Side-by-side

Showing with 17 additions and 6 deletions

ee/app/models/concerns/ee/ci/artifactable.rb ee/app/models/concerns/ee/ci/artifactable.rb +17 -6

No files found.
--- a/ee/app/models/concerns/ee/ci/artifactable.rb
+++ b/ee/app/models/concerns/ee/ci/artifactable.rb
@@ -11,21 +11,32 @@ module EE
        def replicables_for_current_secondary(primary_key_in)
          node = ::Gitlab::Geo.current_node
+          replicables =
            primary_key_in(primary_key_in)
-            .merge(selective_sync_scope(node))
              .merge(object_storage_scope(node))
+          selective_sync_scope(node, replicables)
        end
+        # @return [ActiveRecord::Relation<Ci::{Pipeline|Job}PipelineArtifact>] observing object storage settings of the given node
        def object_storage_scope(node)
          return all if node.sync_object_storage?
          with_files_stored_locally
        end
-        def selective_sync_scope(node)
+        # The primary_key_in in replicables_for_current_secondary method is at most a range of IDs with a maximum of 10_000 records
-          return all unless node.selective_sync?
+        # between them. We can additionally reduce the batch size to 1_000 just for pipeline artifacts and job artifacts if needed.
+        #
+        # @return [ActiveRecord::Relation<Ci::{Pipeline|Job}PipelineArtifact>] observing selective sync settings of the given node
+        def selective_sync_scope(node, replicables)
+          return replicables unless node.selective_sync?
+          # Note that we can't do node.projects.ids since it can have millions of records.
+          replicables_project_ids = replicables.distinct.pluck(:project_id)
+          selective_projects_ids  = node.projects.id_in(replicables_project_ids).pluck_primary_key
-          project_id_in(node.projects)
+          replicables.project_id_in(selective_projects_ids)
        end
      end
    end