Commit 882bf1e9 authored by drew cimino's avatar drew cimino Committed by Adam Hegyi

Add index for querying ci_job_artifacts

Adding a temporary index for querying job trace records with seven
specific timestamps that we need to update a column for on gitlab.com
parent f4cd8c7c
# frozen_string_literal: true
class IndexJobArtifactsOnTraceTypeAndExpireAt < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
INDEX_NAME = 'tmp_index_ci_job_artifacts_on_id_where_trace_and_expire_at'
TIMESTAMPS = "'2021-04-22 00:00:00', '2021-05-22 00:00:00', '2021-06-22 00:00:00', '2022-01-22 00:00:00', '2022-02-22 00:00:00', '2022-03-22 00:00:00', '2022-04-22 00:00:00'"
def up
add_concurrent_index :ci_job_artifacts, :id, where: "file_type = 3 AND expire_at IN (#{TIMESTAMPS})", name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :ci_job_artifacts, INDEX_NAME
end
end
# frozen_string_literal: true
class ScheduleTraceExpiryRemoval < Gitlab::Database::Migration[1.0]
MIGRATION = 'RemoveAllTraceExpirationDates'
BATCH_SIZE = 100_000
DELAY_INTERVAL = 4.minutes
disable_ddl_transaction!
# Stubbed class to connect to the CI database
# connects_to has to be called in abstract classes.
class MultiDbAdaptableClass < ActiveRecord::Base
self.abstract_class = true
if Gitlab::Database.has_config?(:ci)
connects_to database: { writing: :ci, reading: :ci }
end
end
# Stubbed class to access the ci_job_artifacts table
class JobArtifact < MultiDbAdaptableClass
include EachBatch
self.table_name = 'ci_job_artifacts'
TARGET_TIMESTAMPS = [
Date.new(2021, 04, 22).midnight.utc,
Date.new(2021, 05, 22).midnight.utc,
Date.new(2021, 06, 22).midnight.utc,
Date.new(2022, 01, 22).midnight.utc,
Date.new(2022, 02, 22).midnight.utc,
Date.new(2022, 03, 22).midnight.utc,
Date.new(2022, 04, 22).midnight.utc
].freeze
scope :in_targeted_timestamps, -> { where(expire_at: TARGET_TIMESTAMPS) }
scope :traces, -> { where(file_type: 3) }
end
def up
return unless Gitlab.com?
queue_background_migration_jobs_by_range_at_intervals(
JobArtifact.traces.in_targeted_timestamps,
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE,
track_jobs: true
)
end
def down
# no-op
end
end
08326048e15f368f09bc10ebf5bee3e77e8b43813f66c19d24731497ca6a8485
\ No newline at end of file
59fe701bcaa102b7e0c1496198fa4aeea6b2e59132c951d1c9d54562c5e3900e
\ No newline at end of file
...@@ -29522,6 +29522,8 @@ CREATE INDEX tmp_gitlab_subscriptions_max_seats_used_migration_2 ON gitlab_subsc ...@@ -29522,6 +29522,8 @@ CREATE INDEX tmp_gitlab_subscriptions_max_seats_used_migration_2 ON gitlab_subsc
CREATE INDEX tmp_idx_vulnerability_occurrences_on_id_where_report_type_7_99 ON vulnerability_occurrences USING btree (id) WHERE (report_type = ANY (ARRAY[7, 99])); CREATE INDEX tmp_idx_vulnerability_occurrences_on_id_where_report_type_7_99 ON vulnerability_occurrences USING btree (id) WHERE (report_type = ANY (ARRAY[7, 99]));
CREATE INDEX tmp_index_ci_job_artifacts_on_id_where_trace_and_expire_at ON ci_job_artifacts USING btree (id) WHERE ((file_type = 3) AND (expire_at = ANY (ARRAY['2021-04-22 00:00:00+00'::timestamp with time zone, '2021-05-22 00:00:00+00'::timestamp with time zone, '2021-06-22 00:00:00+00'::timestamp with time zone, '2022-01-22 00:00:00+00'::timestamp with time zone, '2022-02-22 00:00:00+00'::timestamp with time zone, '2022-03-22 00:00:00+00'::timestamp with time zone, '2022-04-22 00:00:00+00'::timestamp with time zone])));
CREATE INDEX tmp_index_container_repositories_on_id_migration_state ON container_repositories USING btree (id, migration_state); CREATE INDEX tmp_index_container_repositories_on_id_migration_state ON container_repositories USING btree (id, migration_state);
CREATE INDEX tmp_index_for_namespace_id_migration_on_group_members ON members USING btree (id) WHERE ((member_namespace_id IS NULL) AND ((type)::text = 'GroupMember'::text)); CREATE INDEX tmp_index_for_namespace_id_migration_on_group_members ON members USING btree (id) WHERE ((member_namespace_id IS NULL) AND ((type)::text = 'GroupMember'::text));
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::RemoveAllTraceExpirationDates, :migration, schema: 20220131000001 do
subject(:perform) { migration.perform(1, 99) }
let(:migration) { described_class.new }
let(:trace_in_range) { create_trace!(id: 10, created_at: Date.new(2020, 06, 20), expire_at: Date.new(2022, 01, 22)) }
let(:trace_outside_range) { create_trace!(id: 40, created_at: Date.new(2020, 06, 22), expire_at: Date.new(2021, 01, 22)) }
let(:trace_without_expiry) { create_trace!(id: 30, created_at: Date.new(2020, 06, 21), expire_at: nil) }
let(:archive_in_range) { create_archive!(id: 10, created_at: Date.new(2020, 06, 20), expire_at: Date.new(2021, 01, 22)) }
let(:trace_outside_id_range) { create_trace!(id: 100, created_at: Date.new(2020, 06, 20), expire_at: Date.new(2021, 02, 22)) }
before do
table(:namespaces).create!(id: 1, name: 'the-namespace', path: 'the-path')
table(:projects).create!(id: 1, name: 'the-project', namespace_id: 1)
table(:ci_builds).create!(id: 1, allow_failure: false)
end
context 'on gitlab.com', :saas do
it 'sets expire_at for artifacts in range to nil' do
expect { perform }.to change { trace_in_range.reload.expire_at }.from(trace_in_range.expire_at).to(nil)
end
it 'does not change expire_at timestamps that are not set to midnight' do
expect { perform }.not_to change { trace_outside_range.reload.expire_at }
end
it 'does not change expire_at timestamps that are set to midnight on a day other than the 22nd' do
expect { perform }.not_to change { trace_without_expiry.reload.expire_at }
end
it 'does not touch artifacts outside id range' do
expect { perform }.not_to change { archive_in_range.reload.expire_at }
end
it 'does not touch artifacts outside date range' do
expect { perform }.not_to change { trace_outside_id_range.reload.expire_at }
end
end
private
def create_trace!(**args)
table(:ci_job_artifacts).create!(**args, project_id: 1, job_id: 1, file_type: 3)
end
def create_archive!(**args)
table(:ci_job_artifacts).create!(**args, project_id: 1, job_id: 1, file_type: 1)
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20220131000001_schedule_trace_expiry_removal.rb')
RSpec.describe ScheduleTraceExpiryRemoval do
let(:scheduling_migration) { described_class.new }
let(:background_migration) { described_class::MIGRATION }
let(:matching_row_attrs) { { created_at: Date.new(2020, 06, 20), expire_at: Date.new(2022, 01, 22), project_id: 1, file_type: 3 } }
before do
Sidekiq::Worker.clear_all
stub_const("#{described_class.name}::BATCH_SIZE", 2)
table(:namespaces).create!(id: 1, name: 'the-namespace', path: 'the-path')
table(:projects).create!(id: 1, name: 'the-project', namespace_id: 1)
1.upto(10) do |n|
table(:ci_builds).create!(id: n, allow_failure: false)
table(:ci_job_artifacts).create!(id: n, job_id: n, **matching_row_attrs)
end
end
context 'on gitlab.com', :saas do
describe '#up' do
it 'schedules batches of the correct size at 2 minute intervals' do
Sidekiq::Testing.fake! do
freeze_time do
migrate!
expect(background_migration).to be_scheduled_delayed_migration(240.seconds, 1, 2)
expect(background_migration).to be_scheduled_delayed_migration(480.seconds, 3, 4)
expect(background_migration).to be_scheduled_delayed_migration(720.seconds, 5, 6)
expect(background_migration).to be_scheduled_delayed_migration(960.seconds, 7, 8)
expect(background_migration).to be_scheduled_delayed_migration(1200.seconds, 9, 10)
expect(BackgroundMigrationWorker.jobs.size).to eq(5)
end
end
end
end
end
context 'on self-hosted instances' do
describe '#up' do
it 'does nothing' do
Sidekiq::Testing.fake! do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Removing expire_at timestamps that shouldn't have
# been written to traces on gitlab.com.
class RemoveAllTraceExpirationDates
include Gitlab::Database::MigrationHelpers
BATCH_SIZE = 1_000
# Stubbed class to connect to the CI database
# connects_to has to be called in abstract classes.
class MultiDbAdaptableClass < ActiveRecord::Base
self.abstract_class = true
if Gitlab::Database.has_config?(:ci)
connects_to database: { writing: :ci, reading: :ci }
end
end
# Stubbed class to access the ci_job_artifacts table
class JobArtifact < MultiDbAdaptableClass
include EachBatch
self.table_name = 'ci_job_artifacts'
TARGET_TIMESTAMPS = [
Date.new(2021, 04, 22).midnight.utc,
Date.new(2021, 05, 22).midnight.utc,
Date.new(2021, 06, 22).midnight.utc,
Date.new(2022, 01, 22).midnight.utc,
Date.new(2022, 02, 22).midnight.utc,
Date.new(2022, 03, 22).midnight.utc,
Date.new(2022, 04, 22).midnight.utc
].freeze
scope :traces, -> { where(file_type: 3) }
scope :between, -> (start_id, end_id) { where(id: start_id..end_id) }
scope :in_targeted_timestamps, -> { where(expire_at: TARGET_TIMESTAMPS) }
end
def perform(start_id, end_id)
return unless Gitlab.com?
JobArtifact.traces
.between(start_id, end_id)
.in_targeted_timestamps
.each_batch(of: BATCH_SIZE) { |batch| batch.update_all(expire_at: nil) }
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::RemoveAllTraceExpirationDates, :migration, schema: 20220131000001 do
subject(:perform) { migration.perform(1, 99) }
let(:migration) { described_class.new }
let(:trace_in_range) { create_trace!(id: 10, created_at: Date.new(2020, 06, 20), expire_at: Date.new(2021, 01, 22)) }
let(:trace_outside_range) { create_trace!(id: 40, created_at: Date.new(2020, 06, 22), expire_at: Date.new(2021, 01, 22)) }
let(:trace_without_expiry) { create_trace!(id: 30, created_at: Date.new(2020, 06, 21), expire_at: nil) }
let(:archive_in_range) { create_archive!(id: 10, created_at: Date.new(2020, 06, 20), expire_at: Date.new(2021, 01, 22)) }
let(:trace_outside_id_range) { create_trace!(id: 100, created_at: Date.new(2020, 06, 20), expire_at: Date.new(2021, 01, 22)) }
before do
table(:namespaces).create!(id: 1, name: 'the-namespace', path: 'the-path')
table(:projects).create!(id: 1, name: 'the-project', namespace_id: 1)
table(:ci_builds).create!(id: 1, allow_failure: false)
end
context 'for self-hosted instances' do
it 'sets expire_at for artifacts in range to nil' do
expect { perform }.not_to change { trace_in_range.reload.expire_at }
end
it 'does not change expire_at timestamps that are not set to midnight' do
expect { perform }.not_to change { trace_outside_range.reload.expire_at }
end
it 'does not change expire_at timestamps that are set to midnight on a day other than the 22nd' do
expect { perform }.not_to change { trace_without_expiry.reload.expire_at }
end
it 'does not touch artifacts outside id range' do
expect { perform }.not_to change { archive_in_range.reload.expire_at }
end
it 'does not touch artifacts outside date range' do
expect { perform }.not_to change { trace_outside_id_range.reload.expire_at }
end
end
private
def create_trace!(**args)
table(:ci_job_artifacts).create!(**args, project_id: 1, job_id: 1, file_type: 3)
end
def create_archive!(**args)
table(:ci_job_artifacts).create!(**args, project_id: 1, job_id: 1, file_type: 1)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment