Commit 1b788c66 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'sh-fix-issue-52649' into 'master'

Fix statement timeouts in RemoveRestrictedTodos migration

Closes #52649

See merge request gitlab-org/gitlab-ce!22795
parents 6da11809 bb55bcdd
...@@ -39,7 +39,15 @@ module EachBatch ...@@ -39,7 +39,15 @@ module EachBatch
# #
# of - The number of rows to retrieve per batch. # of - The number of rows to retrieve per batch.
# column - The column to use for ordering the batches. # column - The column to use for ordering the batches.
def each_batch(of: 1000, column: primary_key) # order_hint - An optional column to append to the `ORDER BY id`
# clause to help the query planner. PostgreSQL might perform badly
# with a LIMIT 1 because the planner is guessing that scanning the
# index in ID order will come across the desired row in less time
# it will take the planner than using another index. The
# order_hint does not affect the search results. For example,
# `ORDER BY id ASC, updated_at ASC` means the same thing as `ORDER
# BY id ASC`.
def each_batch(of: 1000, column: primary_key, order_hint: nil)
unless column unless column
raise ArgumentError, raise ArgumentError,
'the column: argument must be set to a column name to use for ordering rows' 'the column: argument must be set to a column name to use for ordering rows'
...@@ -48,7 +56,9 @@ module EachBatch ...@@ -48,7 +56,9 @@ module EachBatch
start = except(:select) start = except(:select)
.select(column) .select(column)
.reorder(column => :asc) .reorder(column => :asc)
.take
start = start.order(order_hint) if order_hint
start = start.take
return unless start return unless start
...@@ -60,6 +70,9 @@ module EachBatch ...@@ -60,6 +70,9 @@ module EachBatch
.select(column) .select(column)
.where(arel_table[column].gteq(start_id)) .where(arel_table[column].gteq(start_id))
.reorder(column => :asc) .reorder(column => :asc)
stop = stop.order(order_hint) if order_hint
stop = stop
.offset(of) .offset(of)
.limit(1) .limit(1)
.take .take
......
---
title: Fix statement timeouts in RemoveRestrictedTodos migration
merge_request: 22795
author:
type: other
# frozen_string_literal: true
# rescheduling of the revised RemoveRestrictedTodosWithCte background migration
class RemoveRestrictedTodosAgain < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
MIGRATION = 'RemoveRestrictedTodos'.freeze
BATCH_SIZE = 1000
DELAY_INTERVAL = 5.minutes.to_i
class Project < ActiveRecord::Base
include EachBatch
self.table_name = 'projects'
end
def up
Project.where('EXISTS (SELECT 1 FROM todos WHERE todos.project_id = projects.id)')
.each_batch(of: BATCH_SIZE) do |batch, index|
range = batch.pluck('MIN(id)', 'MAX(id)').first
BackgroundMigrationWorker.perform_in(index * DELAY_INTERVAL, MIGRATION, range)
end
end
def down
# nothing to do
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20181101144347) do ActiveRecord::Schema.define(version: 20181107054254) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
......
...@@ -67,7 +67,7 @@ module Gitlab ...@@ -67,7 +67,7 @@ module Gitlab
.where('access_level >= ?', 20) .where('access_level >= ?', 20)
confidential_issues = Issue.select(:id, :author_id).where(confidential: true, project_id: project_id) confidential_issues = Issue.select(:id, :author_id).where(confidential: true, project_id: project_id)
confidential_issues.each_batch(of: 100) do |batch| confidential_issues.each_batch(of: 100, order_hint: :confidential) do |batch|
batch.each do |issue| batch.each do |issue|
assigned_users = IssueAssignee.select(:user_id).where(issue_id: issue.id) assigned_users = IssueAssignee.select(:user_id).where(issue_id: issue.id)
......
...@@ -14,40 +14,45 @@ describe EachBatch do ...@@ -14,40 +14,45 @@ describe EachBatch do
5.times { create(:user, updated_at: 1.day.ago) } 5.times { create(:user, updated_at: 1.day.ago) }
end end
it 'yields an ActiveRecord::Relation when a block is given' do shared_examples 'each_batch handling' do |kwargs|
model.each_batch do |relation| it 'yields an ActiveRecord::Relation when a block is given' do
expect(relation).to be_a_kind_of(ActiveRecord::Relation) model.each_batch(kwargs) do |relation|
expect(relation).to be_a_kind_of(ActiveRecord::Relation)
end
end end
end
it 'yields a batch index as the second argument' do it 'yields a batch index as the second argument' do
model.each_batch do |_, index| model.each_batch(kwargs) do |_, index|
expect(index).to eq(1) expect(index).to eq(1)
end
end end
end
it 'accepts a custom batch size' do it 'accepts a custom batch size' do
amount = 0 amount = 0
model.each_batch(of: 1) { amount += 1 } model.each_batch(kwargs.merge({ of: 1 })) { amount += 1 }
expect(amount).to eq(5) expect(amount).to eq(5)
end end
it 'does not include ORDER BYs in the yielded relations' do it 'does not include ORDER BYs in the yielded relations' do
model.each_batch do |relation| model.each_batch do |relation|
expect(relation.to_sql).not_to include('ORDER BY') expect(relation.to_sql).not_to include('ORDER BY')
end
end end
end
it 'allows updating of the yielded relations' do it 'allows updating of the yielded relations' do
time = Time.now time = Time.now
model.each_batch do |relation| model.each_batch do |relation|
relation.update_all(updated_at: time) relation.update_all(updated_at: time)
end end
expect(model.where(updated_at: time).count).to eq(5) expect(model.where(updated_at: time).count).to eq(5)
end
end end
it_behaves_like 'each_batch handling', {}
it_behaves_like 'each_batch handling', { order_hint: :updated_at }
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment