Commit 2430c28f authored by Erick Bajao's avatar Erick Bajao

Split backfill query to avoid cross joins

parent e3a99a1e
......@@ -6,6 +6,7 @@ class BackfillProjectsWithCoverage < ActiveRecord::Migration[6.1]
MIGRATION = 'BackfillProjectsWithCoverage'
DELAY_INTERVAL = 2.minutes
BATCH_SIZE = 10_000
SUB_BATCH_SIZE = 100
disable_ddl_transaction!
......@@ -20,7 +21,8 @@ class BackfillProjectsWithCoverage < ActiveRecord::Migration[6.1]
CiDailyBuildGroupReportResult,
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE
batch_size: BATCH_SIZE,
other_job_arguments: [SUB_BATCH_SIZE]
)
end
......
# frozen_string_literal: true
# Backfill project_ci_feature_usages for a range of projects with coverage
class Gitlab::BackgroundMigration::BackfillProjectsWithCoverage
COVERAGE_ENUM_VALUE = 1
module Gitlab
module BackgroundMigration
# Backfill project_ci_feature_usages for a range of projects with coverage
class BackfillProjectsWithCoverage
COVERAGE_ENUM_VALUE = 1
INSERT_DELAY_SECONDS = 0.1
def perform(start_id, end_id)
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_ci_feature_usages (project_id, feature, default_branch)
SELECT DISTINCT project_id, #{COVERAGE_ENUM_VALUE} as feature, default_branch
FROM ci_daily_build_group_report_results
WHERE id BETWEEN #{start_id} AND #{end_id}
ON CONFLICT (project_id, feature, default_branch) DO NOTHING;
SQL
def perform(start_id, end_id, sub_batch_size)
report_results = ActiveRecord::Base.connection.execute <<~SQL
SELECT DISTINCT project_id, default_branch
FROM ci_daily_build_group_report_results
WHERE id BETWEEN #{start_id} AND #{end_id}
SQL
report_results.to_a.in_groups_of(sub_batch_size, false) do |batch|
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_ci_feature_usages (project_id, feature, default_branch) VALUES
#{build_values(batch)}
ON CONFLICT (project_id, feature, default_branch) DO NOTHING;
SQL
sleep INSERT_DELAY_SECONDS
end
end
private
def build_values(batch)
batch.map do |data|
"(#{data['project_id']}, #{COVERAGE_ENUM_VALUE}, #{data['default_branch']})"
end.join(', ')
end
end
end
end
......@@ -66,10 +66,12 @@ RSpec.describe Gitlab::BackgroundMigration::BackfillProjectsWithCoverage, schema
default_branch: false,
group_id: group.id
)
stub_const("#{described_class}::INSERT_DELAY_SECONDS", 0)
end
it 'creates entries per project and default_branch combination in the given range', :aggregate_failures do
subject.perform(1, 4)
subject.perform(1, 4, 2)
entries = project_ci_feature_usages.order('project_id ASC, default_branch DESC')
......@@ -81,11 +83,11 @@ RSpec.describe Gitlab::BackgroundMigration::BackfillProjectsWithCoverage, schema
context 'when an entry for the project and default branch combination already exists' do
before do
subject.perform(1, 4)
subject.perform(1, 4, 2)
end
it 'does not create a new entry' do
expect { subject.perform(1, 4) }.not_to change { project_ci_feature_usages.count }
expect { subject.perform(1, 4, 2) }.not_to change { project_ci_feature_usages.count }
end
end
end
......
......@@ -17,6 +17,7 @@ RSpec.describe BackfillProjectsWithCoverage do
describe '#up' do
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
stub_const("#{described_class}::SUB_BATCH_SIZE", 1)
ci_daily_build_group_report_results.create!(
id: 1,
......@@ -60,8 +61,8 @@ RSpec.describe BackfillProjectsWithCoverage do
freeze_time do
migrate!
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, 1, 2)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, 3, 3)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, 1, 2, 1)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, 3, 3, 1)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment