Commit e15a1c73 authored by Jan Provaznik's avatar Jan Provaznik Committed by charlie ablett

Load epic issue metadata info in batches

If number of epic ids we pass to BulkEpicAggregateLoader reaches a limit
(~200), then postgres uses a different plan for the query which is much
slower.

To avoid this situation, we get this metadata in two phases:
1. get epic ids for epics and its descendants (we use pluck because
this query is recursive and find_each wouldn't help here)
2. we iterate in batches and get metadata for each of batch
parent 1a844cb5
---
title: Added composite index to epic_issues table and improved performance of loading
bigger epic roadmaps
merge_request: 54677
author:
type: performance
# frozen_string_literal: true
class AddEpicIssueCompositeIndex < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'index_epic_issues_on_epic_id_and_issue_id'
disable_ddl_transaction!
def up
add_concurrent_index :epic_issues, [:epic_id, :issue_id], name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :epic_issues, INDEX_NAME
end
end
546802f93f64e346b066438e78ace5d2dc54de8a5f6234c2d01296a239cfe74c
\ No newline at end of file
......@@ -22121,6 +22121,8 @@ CREATE INDEX index_environments_on_state_and_auto_stop_at ON environments USING
CREATE INDEX index_epic_issues_on_epic_id ON epic_issues USING btree (epic_id);
CREATE INDEX index_epic_issues_on_epic_id_and_issue_id ON epic_issues USING btree (epic_id, issue_id);
CREATE UNIQUE INDEX index_epic_issues_on_issue_id ON epic_issues USING btree (issue_id);
CREATE INDEX index_epic_metrics ON epic_metrics USING btree (epic_id);
......@@ -272,6 +272,20 @@ module EE
def search(query)
fuzzy_search(query, [:title, :description])
end
def ids_for_base_and_decendants(epic_ids)
::Gitlab::ObjectHierarchy.new(self.for_ids(epic_ids)).base_and_descendants.pluck(:id)
end
def issue_metadata_for_epics(epic_ids:, limit:)
records = self.for_ids(epic_ids)
.left_joins(epic_issues: :issue)
.group("epics.id", "epics.iid", "epics.parent_id", "epics.state_id", "issues.state_id")
.select("epics.id, epics.iid, epics.parent_id, epics.state_id AS epic_state_id, issues.state_id AS issues_state_id, COUNT(issues) AS issues_count, SUM(COALESCE(issues.weight, 0)) AS issues_weight_sum")
.limit(limit)
records.map { |record| record.attributes.with_indifferent_access }
end
end
def resource_parent
......
......@@ -7,6 +7,7 @@ module Gitlab
include ::Gitlab::Graphql::Aggregations::Epics::Constants
MAXIMUM_LOADABLE = 100_001
EPIC_BATCH_SIZE = 1000
attr_reader :target_epic_ids, :results
......@@ -17,25 +18,26 @@ module Gitlab
@target_epic_ids = epic_ids
end
# rubocop: disable CodeReuse/ActiveRecord
def execute
return {} unless target_epic_ids
# the list of epics and epic decendants is intentionally loaded
# separately, the reason is that if number of epic_ids is over some
# limit (~200), then postgres uses a slow query plan and first does
# left join of epic_issues with issues which times out
epic_ids = ::Epic.ids_for_base_and_decendants(target_epic_ids)
raise ArgumentError.new("There are too many epics to load. Please select fewer epics or contact your administrator.") if epic_ids.count >= MAXIMUM_LOADABLE
# We do a left outer join in order to capture epics with no issues
# This is so we can aggregate the epic counts for every epic
raw_results = ::Gitlab::ObjectHierarchy.new(Epic.where(id: target_epic_ids)).base_and_descendants
.left_joins(epic_issues: :issue)
.group("issues.state_id", "epics.id", "epics.iid", "epics.parent_id", "epics.state_id")
.select("epics.id, epics.iid, epics.parent_id, epics.state_id AS epic_state_id, issues.state_id AS issues_state_id, COUNT(issues) AS issues_count, SUM(COALESCE(issues.weight, 0)) AS issues_weight_sum")
.limit(MAXIMUM_LOADABLE)
raw_results = raw_results.map { |record| record.attributes.with_indifferent_access }
raise ArgumentError.new("There are too many records to load. Please select fewer epics or contact your administrator.") if raw_results.count == MAXIMUM_LOADABLE
raw_results = []
epic_ids.in_groups_of(EPIC_BATCH_SIZE).each do |epic_batch_ids|
raw_results += ::Epic.issue_metadata_for_epics(epic_ids: epic_ids, limit: MAXIMUM_LOADABLE)
raise ArgumentError.new("There are too many records to load. Please select fewer epics or contact your administrator.") if raw_results.count >= MAXIMUM_LOADABLE
end
@results = raw_results.group_by { |record| record[:id] }
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
......
......@@ -88,11 +88,17 @@ RSpec.describe Gitlab::Graphql::Loaders::BulkEpicAggregateLoader do
end
it 'errors when the number of retrieved records exceeds the maximum' do
stub_const("Gitlab::Graphql::Loaders::BulkEpicAggregateLoader::MAXIMUM_LOADABLE", 1)
stub_const("Gitlab::Graphql::Loaders::BulkEpicAggregateLoader::MAXIMUM_LOADABLE", 4)
expect { subject.execute }.to raise_error(ArgumentError, /too many records/)
end
it 'errors when the number of retrieved epics exceeds the maximum' do
stub_const("Gitlab::Graphql::Loaders::BulkEpicAggregateLoader::MAXIMUM_LOADABLE", 1)
expect { subject.execute }.to raise_error(ArgumentError, /too many epics/)
end
context 'testing for a single database query' do
it 'does not repeat database queries for subepics' do
recorder = ActiveRecord::QueryRecorder.new { described_class.new(epic_ids: epic_with_issues.id).execute }
......
......@@ -669,19 +669,58 @@ RSpec.describe Epic do
end
end
describe '.related_issues' do
it 'returns epic issues ordered by relative position' do
epic1 = create(:epic, group: group)
epic2 = create(:epic, group: group)
issue1 = create(:issue, project: project)
issue2 = create(:issue, project: project)
create(:issue, project: project)
create(:epic_issue, epic: epic1, issue: issue1, relative_position: 5)
create(:epic_issue, epic: epic2, issue: issue2, relative_position: 2)
result = described_class.related_issues(ids: [epic1.id, epic2.id])
expect(result.pluck(:id)).to eq [issue2.id, issue1.id]
context 'with existing epics and related issues' do
let_it_be(:epic1) { create(:epic, group: group) }
let_it_be(:epic2) { create(:epic, group: group, parent: epic1) }
let_it_be(:epic3) { create(:epic, group: group, parent: epic2, state: :closed) }
let_it_be(:epic4) { create(:epic, group: group) }
let_it_be(:issue1) { create(:issue, weight: 2) }
let_it_be(:issue2) { create(:issue, weight: 3) }
let_it_be(:issue3) { create(:issue, state: :closed) }
let_it_be(:epic_issue1) { create(:epic_issue, epic: epic2, issue: issue1, relative_position: 5) }
let_it_be(:epic_issue2) { create(:epic_issue, epic: epic2, issue: issue2, relative_position: 2) }
let_it_be(:epic_issue3) { create(:epic_issue, epic: epic3, issue: issue3) }
describe '.related_issues' do
it 'returns epic issues ordered by relative position' do
result = described_class.related_issues(ids: [epic1.id, epic2.id])
expect(result.pluck(:id)).to eq [issue2.id, issue1.id]
end
end
describe '.ids_for_base_and_decendants' do
it 'returns epic ids only for selected epics or its descendant epics' do
create(:epic, group: group)
expect(described_class.ids_for_base_and_decendants([epic1.id, epic4.id]))
.to match_array([epic1.id, epic2.id, epic3.id, epic4.id])
end
end
describe '.issue_metadata_for_epics' do
it 'returns hash containing epic issues count and weight and epic status' do
result = described_class.issue_metadata_for_epics(epic_ids: [epic2.id, epic3.id], limit: 100)
expected = [{
"epic_state_id" => 1,
"id" => epic2.id,
"iid" => epic2.iid,
"issues_count" => 2,
"issues_state_id" => 1,
"issues_weight_sum" => 5,
"parent_id" => epic1.id
}, {
"epic_state_id" => 2,
"id" => epic3.id,
"iid" => epic3.iid,
"issues_count" => 1,
"issues_state_id" => 2,
"issues_weight_sum" => 0,
"parent_id" => epic2.id
}]
expect(result).to match_array(expected)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment