Load epic issue metadata info in batches

If number of epic ids we pass to BulkEpicAggregateLoader reaches a limit (~200), then postgres uses a different plan for the query which is much slower. To avoid this situation, we get this metadata in two phases: 1. get epic ids for epics and its descendants (we use pluck because this query is recursive and find_each wouldn't help here) 2. we iterate in batches and get metadata for each of batch

Load epic issue metadata info in batches
If number of epic ids we pass to BulkEpicAggregateLoader reaches a limit (~200), then postgres uses a different plan for the query which is much slower. To avoid this situation, we get this metadata in two phases: 1. get epic ids for epics and its descendants (we use pluck because this query is recursive and find_each wouldn't help here) 2. we iterate in batches and get metadata for each of batch
e15a1c73 · Jan Provaznik · charlie ablett · 1a844cb5 · e15a1c73 · e15a1c73
Commit e15a1c73 authored Feb 23, 2021 by Jan Provaznik Committed by charlie ablett Feb 23, 2021
8 changed files
--- a/changelogs/unreleased/epic_count_query.yml
+++ b/changelogs/unreleased/epic_count_query.yml
+---
+title: Added composite index to epic_issues table and improved performance of loading
+  bigger epic roadmaps
+merge_request: 54677
+author:
+type: performance
--- a/db/migrate/20210219111040_add_epic_issue_composite_index.rb
+++ b/db/migrate/20210219111040_add_epic_issue_composite_index.rb
+# frozen_string_literal: true
+
+class AddEpicIssueCompositeIndex < ActiveRecord::Migration[6.0]
+  include Gitlab::Database::MigrationHelpers
+
+  DOWNTIME = false
+  INDEX_NAME = 'index_epic_issues_on_epic_id_and_issue_id'
+
+  disable_ddl_transaction!
+
+  def up
+    add_concurrent_index :epic_issues, [:epic_id, :issue_id], name: INDEX_NAME
+  end
+
+  def down
+    remove_concurrent_index_by_name :epic_issues, INDEX_NAME
+  end
+end
--- a/db/schema_migrations/20210219111040
+++ b/db/schema_migrations/20210219111040
+546802f93f64e346b066438e78ace5d2dc54de8a5f6234c2d01296a239cfe74c
\ No newline at end of file
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -22121,6 +22121,8 @@ CREATE INDEX index_environments_on_state_and_auto_stop_at ON environments USING

 CREATE INDEX index_epic_issues_on_epic_id ON epic_issues USING btree (epic_id);

+CREATE INDEX index_epic_issues_on_epic_id_and_issue_id ON epic_issues USING btree (epic_id, issue_id);
+
 CREATE UNIQUE INDEX index_epic_issues_on_issue_id ON epic_issues USING btree (issue_id);

 CREATE INDEX index_epic_metrics ON epic_metrics USING btree (epic_id);
--- a/ee/app/models/ee/epic.rb
+++ b/ee/app/models/ee/epic.rb
@@ -272,6 +272,20 @@ module EE
      def search(query)
        fuzzy_search(query, [:title, :description])
      end
+
+      def ids_for_base_and_decendants(epic_ids)
+        ::Gitlab::ObjectHierarchy.new(self.for_ids(epic_ids)).base_and_descendants.pluck(:id)
+      end
+
+      def issue_metadata_for_epics(epic_ids:, limit:)
+        records = self.for_ids(epic_ids)
+          .left_joins(epic_issues: :issue)
+          .group("epics.id", "epics.iid", "epics.parent_id", "epics.state_id", "issues.state_id")
+          .select("epics.id, epics.iid, epics.parent_id, epics.state_id AS epic_state_id, issues.state_id AS issues_state_id, COUNT(issues) AS issues_count, SUM(COALESCE(issues.weight, 0)) AS issues_weight_sum")
+          .limit(limit)
+
+        records.map { |record| record.attributes.with_indifferent_access }
+      end
    end

    def resource_parent

--- a/ee/lib/gitlab/graphql/loaders/bulk_epic_aggregate_loader.rb
+++ b/ee/lib/gitlab/graphql/loaders/bulk_epic_aggregate_loader.rb
@@ -7,6 +7,7 @@ module Gitlab
        include ::Gitlab::Graphql::Aggregations::Epics::Constants

        MAXIMUM_LOADABLE = 100_001
+        EPIC_BATCH_SIZE = 1000

        attr_reader :target_epic_ids, :results

@@ -17,25 +18,26 @@ module Gitlab
          @target_epic_ids = epic_ids
        end

-        # rubocop: disable CodeReuse/ActiveRecord
        def execute
          return {} unless target_epic_ids

+          # the list of epics and epic decendants is intentionally loaded
+          # separately, the reason is that if number of epic_ids is over some
+          # limit (~200), then postgres uses a slow query plan and first does
+          # left join of epic_issues with issues which times out
+          epic_ids = ::Epic.ids_for_base_and_decendants(target_epic_ids)
+          raise ArgumentError.new("There are too many epics to load. Please select fewer epics or contact your administrator.") if epic_ids.count >= MAXIMUM_LOADABLE
+
          # We do a left outer join in order to capture epics with no issues
          # This is so we can aggregate the epic counts for every epic
-          raw_results = ::Gitlab::ObjectHierarchy.new(Epic.where(id: target_epic_ids)).base_and_descendants
-            .left_joins(epic_issues: :issue)
-            .group("issues.state_id", "epics.id", "epics.iid", "epics.parent_id", "epics.state_id")
-            .select("epics.id, epics.iid, epics.parent_id, epics.state_id AS epic_state_id, issues.state_id AS issues_state_id, COUNT(issues) AS issues_count, SUM(COALESCE(issues.weight, 0)) AS issues_weight_sum")
-            .limit(MAXIMUM_LOADABLE)
-
-          raw_results = raw_results.map { |record| record.attributes.with_indifferent_access }
-
-          raise ArgumentError.new("There are too many records to load. Please select fewer epics or contact your administrator.") if raw_results.count == MAXIMUM_LOADABLE
+          raw_results = []
+          epic_ids.in_groups_of(EPIC_BATCH_SIZE).each do |epic_batch_ids|
+            raw_results += ::Epic.issue_metadata_for_epics(epic_ids: epic_ids, limit: MAXIMUM_LOADABLE)
+            raise ArgumentError.new("There are too many records to load. Please select fewer epics or contact your administrator.") if raw_results.count >= MAXIMUM_LOADABLE
+          end

          @results = raw_results.group_by { |record| record[:id] }
        end
-        # rubocop: enable CodeReuse/ActiveRecord
      end
    end
  end

--- a/ee/spec/lib/gitlab/graphql/loaders/bulk_epic_aggregate_loader_spec.rb
+++ b/ee/spec/lib/gitlab/graphql/loaders/bulk_epic_aggregate_loader_spec.rb
@@ -88,11 +88,17 @@ RSpec.describe Gitlab::Graphql::Loaders::BulkEpicAggregateLoader do
    end

    it 'errors when the number of retrieved records exceeds the maximum' do
-      stub_const("Gitlab::Graphql::Loaders::BulkEpicAggregateLoader::MAXIMUM_LOADABLE", 1)
+      stub_const("Gitlab::Graphql::Loaders::BulkEpicAggregateLoader::MAXIMUM_LOADABLE", 4)

      expect { subject.execute }.to raise_error(ArgumentError, /too many records/)
    end

+    it 'errors when the number of retrieved epics exceeds the maximum' do
+      stub_const("Gitlab::Graphql::Loaders::BulkEpicAggregateLoader::MAXIMUM_LOADABLE", 1)
+
+      expect { subject.execute }.to raise_error(ArgumentError, /too many epics/)
+    end
+
    context 'testing for a single database query' do
      it 'does not repeat database queries for subepics' do
        recorder = ActiveRecord::QueryRecorder.new { described_class.new(epic_ids: epic_with_issues.id).execute }

--- a/ee/spec/models/epic_spec.rb
+++ b/ee/spec/models/epic_spec.rb
@@ -669,19 +669,58 @@ RSpec.describe Epic do
    end
  end

-  describe '.related_issues' do
-    it 'returns epic issues ordered by relative position' do
-      epic1 = create(:epic, group: group)
-      epic2 = create(:epic, group: group)
-      issue1 = create(:issue, project: project)
-      issue2 = create(:issue, project: project)
-      create(:issue, project: project)
-      create(:epic_issue, epic: epic1, issue: issue1, relative_position: 5)
-      create(:epic_issue, epic: epic2, issue: issue2, relative_position: 2)
-
-      result = described_class.related_issues(ids: [epic1.id, epic2.id])
-
-      expect(result.pluck(:id)).to eq [issue2.id, issue1.id]
+  context 'with existing epics and related issues' do
+    let_it_be(:epic1) { create(:epic, group: group) }
+    let_it_be(:epic2) { create(:epic, group: group, parent: epic1) }
+    let_it_be(:epic3) { create(:epic, group: group, parent: epic2, state: :closed) }
+    let_it_be(:epic4) { create(:epic, group: group) }
+    let_it_be(:issue1) { create(:issue, weight: 2) }
+    let_it_be(:issue2) { create(:issue, weight: 3) }
+    let_it_be(:issue3) { create(:issue, state: :closed) }
+    let_it_be(:epic_issue1) { create(:epic_issue, epic: epic2, issue: issue1, relative_position: 5) }
+    let_it_be(:epic_issue2) { create(:epic_issue, epic: epic2, issue: issue2, relative_position: 2) }
+    let_it_be(:epic_issue3) { create(:epic_issue, epic: epic3, issue: issue3) }
+
+    describe '.related_issues' do
+      it 'returns epic issues ordered by relative position' do
+        result = described_class.related_issues(ids: [epic1.id, epic2.id])
+
+        expect(result.pluck(:id)).to eq [issue2.id, issue1.id]
+      end
+    end
+
+    describe '.ids_for_base_and_decendants' do
+      it 'returns epic ids only for selected epics or its descendant epics' do
+        create(:epic, group: group)
+
+        expect(described_class.ids_for_base_and_decendants([epic1.id, epic4.id]))
+          .to match_array([epic1.id, epic2.id, epic3.id, epic4.id])
+      end
+    end
+
+    describe '.issue_metadata_for_epics' do
+      it 'returns hash containing epic issues count and weight and epic status' do
+        result = described_class.issue_metadata_for_epics(epic_ids: [epic2.id, epic3.id], limit: 100)
+
+        expected = [{
+          "epic_state_id" => 1,
+          "id" => epic2.id,
+          "iid" => epic2.iid,
+          "issues_count" => 2,
+          "issues_state_id" => 1,
+          "issues_weight_sum" => 5,
+          "parent_id" => epic1.id
+        }, {
+          "epic_state_id" => 2,
+          "id" => epic3.id,
+          "iid" => epic3.iid,
+          "issues_count" => 1,
+          "issues_state_id" => 2,
+          "issues_weight_sum" => 0,
+          "parent_id" => epic2.id
+        }]
+        expect(result).to match_array(expected)
+      end
    end
  end