Commit 0d346291 authored by Igor Drozdov's avatar Igor Drozdov

Add post migration to cleanup orphaned records

The migration cleans up lfs_objects_projects orphaned records

The number of lfs_objects_projects on SaSS: 73747276
Batch size: 10000
Interval: 2 minutes

The job is going to execute: ~10 days

Changelog: added
MR: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/61787
parent 17896b3b
# frozen_string_literal: true
class ScheduleCleanupOrphanedLfsObjectsProjects < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
MIGRATION = 'CleanupOrphanedLfsObjectsProjects'
DELAY_INTERVAL = 2.minutes
BATCH_SIZE = 50_000
disable_ddl_transaction!
class LfsObjectsProject < ActiveRecord::Base
self.table_name = 'lfs_objects_projects'
include ::EachBatch
end
def up
queue_background_migration_jobs_by_range_at_intervals(LfsObjectsProject, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
def down
# NOOP
end
end
7e52f9ba8470fd8c2e149fea723c9b06b92ecde2dac4db4512534b3e23952c61
\ No newline at end of file
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# The migration is used to cleanup orphaned lfs_objects_projects in order to
# introduce valid foreign keys to this table
class CleanupOrphanedLfsObjectsProjects
# A model to access lfs_objects_projects table in migrations
class LfsObjectsProject < ActiveRecord::Base
self.table_name = 'lfs_objects_projects'
include ::EachBatch
belongs_to :lfs_object
belongs_to :project
end
# A model to access lfs_objects table in migrations
class LfsObject < ActiveRecord::Base
self.table_name = 'lfs_objects'
end
# A model to access projects table in migrations
class Project < ActiveRecord::Base
self.table_name = 'projects'
end
SUB_BATCH_SIZE = 5000
CLEAR_CACHE_DELAY = 1.minute
def perform(start_id, end_id)
cleanup_lfs_objects_projects_without_lfs_object(start_id, end_id)
cleanup_lfs_objects_projects_without_project(start_id, end_id)
end
private
def cleanup_lfs_objects_projects_without_lfs_object(start_id, end_id)
each_record_without_association(start_id, end_id, :lfs_object, :lfs_objects) do |lfs_objects_projects_without_lfs_objects|
projects = Project.where(id: lfs_objects_projects_without_lfs_objects.select(:project_id))
if projects.present?
ProjectCacheWorker.bulk_perform_in_with_contexts(
CLEAR_CACHE_DELAY,
projects,
arguments_proc: ->(project) { [project.id, [], [:lfs_objects_size]] },
context_proc: ->(project) { { project: project } }
)
end
lfs_objects_projects_without_lfs_objects.delete_all
end
end
def cleanup_lfs_objects_projects_without_project(start_id, end_id)
each_record_without_association(start_id, end_id, :project, :projects) do |lfs_objects_projects_without_projects|
lfs_objects_projects_without_projects.delete_all
end
end
def each_record_without_association(start_id, end_id, association, table_name)
batch = LfsObjectsProject.where(id: start_id..end_id)
batch.each_batch(of: SUB_BATCH_SIZE) do |sub_batch|
first, last = sub_batch.pluck(Arel.sql('min(lfs_objects_projects.id), max(lfs_objects_projects.id)')).first
lfs_objects_without_association =
LfsObjectsProject
.unscoped
.left_outer_joins(association)
.where(id: (first..last), table_name => { id: nil })
yield lfs_objects_without_association
end
end
end
end
end
......@@ -54,7 +54,6 @@ RSpec.describe 'Database schema' do
keys: %w[user_id],
label_links: %w[target_id],
ldap_group_links: %w[group_id],
lfs_objects_projects: %w[lfs_object_id project_id],
members: %w[source_id created_by_id],
merge_requests: %w[last_edited_by_id state_id],
namespaces: %w[owner_id parent_id],
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::CleanupOrphanedLfsObjectsProjects, schema: 20210514063252 do
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:lfs_objects) { table(:lfs_objects) }
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let(:namespace) { namespaces.create!(name: 'namespace', path: 'namespace') }
let(:project) { projects.create!(namespace_id: namespace.id) }
let(:another_project) { projects.create!(namespace_id: namespace.id) }
let(:lfs_object) { lfs_objects.create!(oid: 'abcdef', size: 1) }
let(:another_lfs_object) { lfs_objects.create!(oid: '1abcde', size: 2) }
let!(:without_object1) { create_object(project_id: project.id) }
let!(:without_object2) { create_object(project_id: another_project.id) }
let!(:without_object3) { create_object(project_id: another_project.id) }
let!(:with_project_and_object1) { create_object(project_id: project.id, lfs_object_id: lfs_object.id) }
let!(:with_project_and_object2) { create_object(project_id: project.id, lfs_object_id: another_lfs_object.id) }
let!(:with_project_and_object3) { create_object(project_id: another_project.id, lfs_object_id: another_lfs_object.id) }
let!(:without_project1) { create_object(lfs_object_id: lfs_object.id) }
let!(:without_project2) { create_object(lfs_object_id: another_lfs_object.id) }
let!(:without_project_and_object) { create_object }
def create_object(project_id: non_existing_record_id, lfs_object_id: non_existing_record_id)
lfs_objects_project = nil
ActiveRecord::Base.connection.disable_referential_integrity do
lfs_objects_project = lfs_objects_projects.create!(project_id: project_id, lfs_object_id: lfs_object_id)
end
lfs_objects_project
end
subject { described_class.new }
describe '#perform' do
it 'lfs_objects_projects without an existing lfs object or project are removed' do
subject.perform(without_object1.id, without_object3.id)
expect(lfs_objects_projects.all).to match_array([
with_project_and_object1, with_project_and_object2, with_project_and_object3,
without_project1, without_project2, without_project_and_object
])
subject.perform(with_project_and_object1.id, with_project_and_object3.id)
expect(lfs_objects_projects.all).to match_array([
with_project_and_object1, with_project_and_object2, with_project_and_object3,
without_project1, without_project2, without_project_and_object
])
subject.perform(without_project1.id, without_project_and_object.id)
expect(lfs_objects_projects.all).to match_array([
with_project_and_object1, with_project_and_object2, with_project_and_object3
])
expect(lfs_objects.ids).to contain_exactly(lfs_object.id, another_lfs_object.id)
expect(projects.ids).to contain_exactly(project.id, another_project.id)
end
it 'cache for affected projects is being reset' do
expect(ProjectCacheWorker).to receive(:bulk_perform_in) do |delay, args|
expect(delay).to eq(1.minute)
expect(args).to match_array([[project.id, [], [:lfs_objects_size]], [another_project.id, [], [:lfs_objects_size]]])
end
subject.perform(without_object1.id, with_project_and_object1.id)
expect(ProjectCacheWorker).not_to receive(:bulk_perform_in)
subject.perform(with_project_and_object1.id, with_project_and_object3.id)
expect(ProjectCacheWorker).not_to receive(:bulk_perform_in)
subject.perform(without_project1.id, without_project_and_object.id)
end
end
end
......@@ -53,7 +53,7 @@ RSpec.describe Gitlab::Cleanup::OrphanLfsFileReferences do
expect(null_logger).to receive(:info).with(/Looking for orphan LFS files/)
expect(null_logger).to receive(:info).with(/Nothing to do/)
project.lfs_objects_projects.delete_all
LfsObjectsProject.where(project: project).delete_all
expect(service).not_to receive(:remove_orphan_references)
......
# frozen_string_literal: true
require 'spec_helper'
require_migration!('schedule_cleanup_orphaned_lfs_objects_projects')
RSpec.describe ScheduleCleanupOrphanedLfsObjectsProjects, schema: 20210511165250 do
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let(:lfs_objects) { table(:lfs_objects) }
let(:namespace) { namespaces.create!(name: 'namespace', path: 'namespace') }
let(:project) { projects.create!(namespace_id: namespace.id) }
let(:another_project) { projects.create!(namespace_id: namespace.id) }
let(:lfs_object) { lfs_objects.create!(oid: 'abcdef', size: 1) }
let(:another_lfs_object) { lfs_objects.create!(oid: '1abcde', size: 2) }
describe '#up' do
it 'schedules CleanupOrphanedLfsObjectsProjects background jobs' do
stub_const("#{described_class}::BATCH_SIZE", 2)
lfs_objects_project1 = lfs_objects_projects.create!(project_id: project.id, lfs_object_id: lfs_object.id)
lfs_objects_project2 = lfs_objects_projects.create!(project_id: another_project.id, lfs_object_id: lfs_object.id)
lfs_objects_project3 = lfs_objects_projects.create!(project_id: project.id, lfs_object_id: another_lfs_object.id)
lfs_objects_project4 = lfs_objects_projects.create!(project_id: another_project.id, lfs_object_id: another_lfs_object.id)
freeze_time do
migrate!
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, lfs_objects_project1.id, lfs_objects_project2.id)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, lfs_objects_project3.id, lfs_objects_project4.id)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
......@@ -6456,6 +6456,10 @@ RSpec.describe Project, factory_default: :keep do
it 'lfs_objects_projects associations are deleted along with project' do
expect { project.delete }.to change(LfsObjectsProject, :count).by(-2)
end
it 'lfs_objects associations are unchanged when the assicated project is removed' do
expect { project.delete }.not_to change(LfsObject, :count)
end
end
context 'when project has no associated LFS objects' do
......
......@@ -8,13 +8,14 @@ RSpec.describe Lfs::PushService do
let_it_be(:project) { create(:forked_project_with_submodules) }
let_it_be(:remote_mirror) { create(:remote_mirror, project: project, enabled: true) }
let_it_be(:lfs_object) { create_linked_lfs_object(project, :project) }
let(:params) { { url: remote_mirror.bare_url, credentials: remote_mirror.credentials } }
subject(:service) { described_class.new(project, nil, params) }
describe "#execute" do
let_it_be(:lfs_object) { create_linked_lfs_object(project, :project) }
it 'uploads the object when upload is requested' do
stub_lfs_batch(lfs_object)
......@@ -25,14 +26,6 @@ RSpec.describe Lfs::PushService do
expect(service.execute).to eq(status: :success)
end
it 'does nothing if there are no LFS objects' do
lfs_object.destroy!
expect(lfs_client).not_to receive(:upload!)
expect(service.execute).to eq(status: :success)
end
it 'does not upload the object when upload is not requested' do
stub_lfs_batch(lfs_object, upload: false)
......@@ -88,6 +81,12 @@ RSpec.describe Lfs::PushService do
end
end
it 'does nothing if there are no LFS objects' do
expect(lfs_client).not_to receive(:upload!)
expect(service.execute).to eq(status: :success)
end
def create_linked_lfs_object(project, type)
create(:lfs_objects_project, project: project, repository_type: type).lfs_object
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment