Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
3c22f6f1
Commit
3c22f6f1
authored
Apr 21, 2021
by
Patrick Bair
Committed by
Mayra Cabrera
Apr 21, 2021
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Support multiple columns when converting int types
parent
ae89e2d4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
155 additions
and
65 deletions
+155
-65
lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
...d_migration/copy_column_using_background_migration_job.rb
+22
-7
lib/gitlab/database/migration_helpers.rb
lib/gitlab/database/migration_helpers.rb
+35
-30
spec/lib/gitlab/background_migration/copy_column_using_background_migration_job_spec.rb
...ration/copy_column_using_background_migration_job_spec.rb
+20
-0
spec/lib/gitlab/database/migration_helpers_spec.rb
spec/lib/gitlab/database/migration_helpers_spec.rb
+78
-28
No files found.
lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
View file @
3c22f6f1
...
...
@@ -2,8 +2,8 @@
module
Gitlab
module
BackgroundMigration
# Background migration that updates the value of
a
# column
using the value of another column
in the same table.
# Background migration that updates the value of
one or more
# column
s using the value of other columns
in the same table.
#
# - The {start_id, end_id} arguments are at the start so that it can be used
# with `queue_batched_background_migration`
...
...
@@ -25,17 +25,21 @@ module Gitlab
# sub_batch_size - We don't want updates to take more than ~100ms
# This allows us to run multiple smaller batches during
# the minimum 2.minute interval that we can schedule jobs
# copy_from -
The column
containing the data to copy.
# copy_to -
The column to copy the data to
.
# copy_from -
List of columns
containing the data to copy.
# copy_to -
List of columns to copy the data to. Order must match the order in `copy_from`
.
def
perform
(
start_id
,
end_id
,
batch_table
,
batch_column
,
sub_batch_size
,
copy_from
,
copy_to
)
quoted_copy_from
=
connection
.
quote_column_name
(
copy_from
)
quoted_copy_to
=
connection
.
quote_column_name
(
copy_to
)
copy_from
=
Array
.
wrap
(
copy_from
)
copy_to
=
Array
.
wrap
(
copy_to
)
raise
ArgumentError
,
'number of source and destination columns must match'
unless
copy_from
.
count
==
copy_to
.
count
assignment_clauses
=
column_assignment_clauses
(
copy_from
,
copy_to
)
parent_batch_relation
=
relation_scoped_to_range
(
batch_table
,
batch_column
,
start_id
,
end_id
)
parent_batch_relation
.
each_batch
(
column:
batch_column
,
of:
sub_batch_size
)
do
|
sub_batch
|
batch_metrics
.
time_operation
(
:update_all
)
do
sub_batch
.
update_all
(
"
#{
quoted_copy_to
}
=
#{
quoted_copy_from
}
"
)
sub_batch
.
update_all
(
assignment_clauses
)
end
sleep
(
PAUSE_SECONDS
)
...
...
@@ -55,6 +59,17 @@ module Gitlab
def
relation_scoped_to_range
(
source_table
,
source_key_column
,
start_id
,
stop_id
)
define_batchable_model
(
source_table
).
where
(
source_key_column
=>
start_id
..
stop_id
)
end
def
column_assignment_clauses
(
copy_from
,
copy_to
)
assignments
=
copy_from
.
zip
(
copy_to
).
map
do
|
from_column
,
to_column
|
from_column
=
connection
.
quote_column_name
(
from_column
)
to_column
=
connection
.
quote_column_name
(
to_column
)
"
#{
to_column
}
=
#{
from_column
}
"
end
assignments
.
join
(
', '
)
end
end
end
end
lib/gitlab/database/migration_helpers.rb
View file @
3c22f6f1
...
...
@@ -905,7 +905,7 @@ module Gitlab
end
end
# Initializes the conversion of a
n integer column
to bigint
# Initializes the conversion of a
set of integer columns
to bigint
#
# It can be used for converting both a Primary Key and any Foreign Keys
# that may reference it or any other integer column that we may want to
...
...
@@ -923,14 +923,14 @@ module Gitlab
# Note: this helper is intended to be used in a regular (pre-deployment) migration.
#
# This helper is part 1 of a multi-step migration process:
# 1. initialize_conversion_of_integer_to_bigint to create the new column
and database triggers
# 1. initialize_conversion_of_integer_to_bigint to create the new column
s and database trigger
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005
#
# table - The name of the database table containing the column
# column
- The name of the column
that we want to convert to bigint.
# column
s - The name, or array of names, of the column(s)
that we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id)
def
initialize_conversion_of_integer_to_bigint
(
table
,
column
,
primary_key: :id
)
def
initialize_conversion_of_integer_to_bigint
(
table
,
column
s
,
primary_key: :id
)
unless
table_exists?
(
table
)
raise
"Table
#{
table
}
does not exist"
end
...
...
@@ -939,34 +939,40 @@ module Gitlab
raise
"Column
#{
primary_key
}
does not exist on
#{
table
}
"
end
unless
column_exists?
(
table
,
column
)
raise
"Column
#{
column
}
does not exist on
#{
table
}
"
columns
=
Array
.
wrap
(
columns
)
columns
.
each
do
|
column
|
next
if
column_exists?
(
table
,
column
)
raise
ArgumentError
,
"Column
#{
column
}
does not exist on
#{
table
}
"
end
check_trigger_permissions!
(
table
)
old_column
=
column_for
(
table
,
column
)
tmp_column
=
"
#{
column
}
_convert_to_bigint"
conversions
=
columns
.
to_h
{
|
column
|
[
column
,
"
#{
column
}
_convert_to_bigint"
]
}
with_lock_retries
do
if
(
column
.
to_s
==
primary_key
.
to_s
)
||
!
old_column
.
null
# If the column to be converted is either a PK or is defined as NOT NULL,
# set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
# That way, we skip the expensive validation step required to add
# a NOT NULL constraint at the end of the process
add_column
(
table
,
tmp_column
,
:bigint
,
default:
old_column
.
default
||
0
,
null:
false
)
else
add_column
(
table
,
tmp_column
,
:bigint
,
default:
old_column
.
default
)
conversions
.
each
do
|
(
source_column
,
temporary_name
)
|
column
=
column_for
(
table
,
source_column
)
if
(
column
.
name
.
to_s
==
primary_key
.
to_s
)
||
!
column
.
null
# If the column to be converted is either a PK or is defined as NOT NULL,
# set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
# That way, we skip the expensive validation step required to add
# a NOT NULL constraint at the end of the process
add_column
(
table
,
temporary_name
,
:bigint
,
default:
column
.
default
||
0
,
null:
false
)
else
add_column
(
table
,
temporary_name
,
:bigint
,
default:
column
.
default
)
end
end
install_rename_triggers
(
table
,
co
lumn
,
tmp_column
)
install_rename_triggers
(
table
,
co
nversions
.
keys
,
conversions
.
values
)
end
end
# Backfills the new column
used in the conversion of an integer column to bigint
using background migrations.
# Backfills the new column
s used in an integer-to-bigint conversion
using background migrations.
#
# - This helper should be called from a post-deployment migration.
# - In order for this helper to work properly, the new column must be first initialized with
# - In order for this helper to work properly, the new column
s
must be first initialized with
# the `initialize_conversion_of_integer_to_bigint` helper.
# - It tracks the scheduled background jobs through Gitlab::Database::BackgroundMigration::BatchedMigration,
# which allows a more thorough check that all jobs succeeded in the
...
...
@@ -976,12 +982,12 @@ module Gitlab
# deployed (including background job changes) before we begin processing the background migration.
#
# This helper is part 2 of a multi-step migration process:
# 1. initialize_conversion_of_integer_to_bigint to create the new column
and database triggers
# 1. initialize_conversion_of_integer_to_bigint to create the new column
s and database trigger
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005
#
# table - The name of the database table containing the column
# column
- The name of the column that
we want to convert to bigint.
# column
s - The name, or an array of names, of the column(s)
we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id)
# batch_size - The number of rows to schedule in a single background migration
# sub_batch_size - The smaller batches that will be used by each scheduled job
...
...
@@ -1001,7 +1007,7 @@ module Gitlab
# between the scheduled jobs
def
backfill_conversion_of_integer_to_bigint
(
table
,
column
,
column
s
,
primary_key: :id
,
batch_size:
20_000
,
sub_batch_size:
1000
,
...
...
@@ -1016,22 +1022,21 @@ module Gitlab
raise
"Column
#{
primary_key
}
does not exist on
#{
table
}
"
end
unless
column_exists?
(
table
,
column
)
raise
"Column
#{
column
}
does not exist on
#{
table
}
"
end
conversions
=
Array
.
wrap
(
columns
).
to_h
do
|
column
|
raise
ArgumentError
,
"Column
#{
column
}
does not exist on
#{
table
}
"
unless
column_exists?
(
table
,
column
)
tmp_column
=
"
#{
column
}
_convert_to_bigint"
temporary_name
=
"
#{
column
}
_convert_to_bigint"
raise
ArgumentError
,
"Column
#{
temporary_name
}
does not exist on
#{
table
}
"
unless
column_exists?
(
table
,
temporary_name
)
unless
column_exists?
(
table
,
tmp_column
)
raise
'The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`'
[
column
,
temporary_name
]
end
batched_migration
=
queue_batched_background_migration
(
'CopyColumnUsingBackgroundMigrationJob'
,
table
,
primary_key
,
co
lumn
,
tmp_column
,
co
nversions
.
keys
,
conversions
.
values
,
job_interval:
interval
,
batch_size:
batch_size
,
sub_batch_size:
sub_batch_size
)
...
...
spec/lib/gitlab/background_migration/copy_column_using_background_migration_job_spec.rb
View file @
3c22f6f1
...
...
@@ -65,6 +65,26 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
expect
(
test_table
.
where
(
"name_convert_to_text = 'no name'"
).
count
).
to
eq
(
0
)
end
it
'copies multiple columns when given'
do
columns_to_copy_from
=
%w[id fk]
columns_to_copy_to
=
%w[id_convert_to_bigint fk_convert_to_bigint]
subject
.
perform
(
10
,
15
,
table_name
,
'id'
,
sub_batch_size
,
columns_to_copy_from
,
columns_to_copy_to
)
expect
(
test_table
.
where
(
'id = id_convert_to_bigint AND fk = fk_convert_to_bigint'
).
pluck
(
:id
)).
to
contain_exactly
(
11
,
12
,
15
)
expect
(
test_table
.
where
(
id_convert_to_bigint:
0
).
where
(
fk_convert_to_bigint:
0
).
pluck
(
:id
)).
to
contain_exactly
(
19
)
expect
(
test_table
.
all
.
count
).
to
eq
(
4
)
end
it
'raises error when number of source and target columns does not match'
do
columns_to_copy_from
=
%w[id fk]
columns_to_copy_to
=
%w[id_convert_to_bigint]
expect
do
subject
.
perform
(
10
,
15
,
table_name
,
'id'
,
sub_batch_size
,
columns_to_copy_from
,
columns_to_copy_to
)
end
.
to
raise_error
(
ArgumentError
,
'number of source and destination columns must match'
)
end
it
'tracks timings of queries'
do
expect
(
subject
.
batch_metrics
.
timings
).
to
be_empty
...
...
spec/lib/gitlab/database/migration_helpers_spec.rb
View file @
3c22f6f1
...
...
@@ -1730,12 +1730,10 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
end
end
context
'when the column to convert does not exist'
do
let
(
:column
)
{
:foobar
}
context
'when the column to migrate does not exist'
do
it
'raises an error'
do
expect
{
model
.
initialize_conversion_of_integer_to_bigint
(
table
,
column
)
}
.
to
raise_error
(
"Column
#{
column
}
does not exist on
#{
table
}
"
)
expect
{
model
.
initialize_conversion_of_integer_to_bigint
(
table
,
:this_column_is_not_real
)
}
.
to
raise_error
(
ArgumentError
,
"Column this_column_is_not_real
does not exist on
#{
table
}
"
)
end
end
...
...
@@ -1743,7 +1741,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it
'creates a not-null bigint column and installs triggers'
do
expect
(
model
).
to
receive
(
:add_column
).
with
(
table
,
tmp_column
,
:bigint
,
default:
0
,
null:
false
)
expect
(
model
).
to
receive
(
:install_rename_triggers
).
with
(
table
,
column
,
tmp_column
)
expect
(
model
).
to
receive
(
:install_rename_triggers
).
with
(
table
,
[
column
],
[
tmp_column
]
)
model
.
initialize_conversion_of_integer_to_bigint
(
table
,
column
)
end
...
...
@@ -1755,7 +1753,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it
'creates a not-null bigint column and installs triggers'
do
expect
(
model
).
to
receive
(
:add_column
).
with
(
table
,
tmp_column
,
:bigint
,
default:
0
,
null:
false
)
expect
(
model
).
to
receive
(
:install_rename_triggers
).
with
(
table
,
column
,
tmp_column
)
expect
(
model
).
to
receive
(
:install_rename_triggers
).
with
(
table
,
[
column
],
[
tmp_column
]
)
model
.
initialize_conversion_of_integer_to_bigint
(
table
,
column
)
end
...
...
@@ -1767,11 +1765,30 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it
'creates a nullable bigint column and installs triggers'
do
expect
(
model
).
to
receive
(
:add_column
).
with
(
table
,
tmp_column
,
:bigint
,
default:
nil
)
expect
(
model
).
to
receive
(
:install_rename_triggers
).
with
(
table
,
column
,
tmp_column
)
expect
(
model
).
to
receive
(
:install_rename_triggers
).
with
(
table
,
[
column
],
[
tmp_column
]
)
model
.
initialize_conversion_of_integer_to_bigint
(
table
,
column
)
end
end
context
'when multiple columns are given'
do
it
'creates the correct columns and installs the trigger'
do
columns_to_convert
=
%i[id non_nullable_column nullable_column]
temporary_columns
=
%w[
id_convert_to_bigint
non_nullable_column_convert_to_bigint
nullable_column_convert_to_bigint
]
expect
(
model
).
to
receive
(
:add_column
).
with
(
table
,
temporary_columns
[
0
],
:bigint
,
default:
0
,
null:
false
)
expect
(
model
).
to
receive
(
:add_column
).
with
(
table
,
temporary_columns
[
1
],
:bigint
,
default:
0
,
null:
false
)
expect
(
model
).
to
receive
(
:add_column
).
with
(
table
,
temporary_columns
[
2
],
:bigint
,
default:
nil
)
expect
(
model
).
to
receive
(
:install_rename_triggers
).
with
(
table
,
columns_to_convert
,
temporary_columns
)
model
.
initialize_conversion_of_integer_to_bigint
(
table
,
columns_to_convert
)
end
end
end
describe
'#backfill_conversion_of_integer_to_bigint'
do
...
...
@@ -1783,6 +1800,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
model
.
create_table
table
,
id:
false
do
|
t
|
t
.
integer
:id
,
primary_key:
true
t
.
text
:message
,
null:
false
t
.
integer
:other_id
t
.
timestamps
end
...
...
@@ -1808,14 +1826,14 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
it
'raises an error'
do
expect
{
model
.
backfill_conversion_of_integer_to_bigint
(
table
,
column
)
}
.
to
raise_error
(
"Column
#{
column
}
does not exist on
#{
table
}
"
)
.
to
raise_error
(
ArgumentError
,
"Column
#{
column
}
does not exist on
#{
table
}
"
)
end
end
context
'when the temporary column does not exist'
do
it
'raises an error'
do
expect
{
model
.
backfill_conversion_of_integer_to_bigint
(
table
,
column
)
}
.
to
raise_error
(
'The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`'
)
.
to
raise_error
(
ArgumentError
,
"Column
#{
tmp_column
}
does not exist on
#{
table
}
"
)
end
end
...
...
@@ -1829,33 +1847,65 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
let
(
:migration_relation
)
{
Gitlab
::
Database
::
BackgroundMigration
::
BatchedMigration
.
active
}
before
do
model
.
initialize_conversion_of_integer_to_bigint
(
table
,
column
)
model
.
initialize_conversion_of_integer_to_bigint
(
table
,
column
s
)
model_class
.
create!
(
message:
'hello'
)
model_class
.
create!
(
message:
'so long'
)
end
it
'creates the batched migration tracking recor
d'
do
l
ast_record
=
model_class
.
create!
(
message:
'goodbye'
)
context
'when a single column is being converte
d'
do
l
et
(
:columns
)
{
column
}
expect
do
model
.
backfill_conversion_of_integer_to_bigint
(
table
,
column
,
batch_size:
2
,
sub_batch_size:
1
)
end
.
to
change
{
migration_relation
.
count
}.
by
(
1
)
expect
(
migration_relation
.
last
).
to
have_attributes
(
job_class_name:
'CopyColumnUsingBackgroundMigrationJob'
,
table_name:
table
.
to_s
,
column_name:
column
.
to_s
,
min_value:
1
,
max_value:
last_record
.
id
,
interval:
120
,
batch_size:
2
,
sub_batch_size:
1
,
job_arguments:
[
column
.
to_s
,
"
#{
column
}
_convert_to_bigint"
]
)
it
'creates the batched migration tracking record'
do
last_record
=
model_class
.
create!
(
message:
'goodbye'
)
expect
do
model
.
backfill_conversion_of_integer_to_bigint
(
table
,
column
,
batch_size:
2
,
sub_batch_size:
1
)
end
.
to
change
{
migration_relation
.
count
}.
by
(
1
)
expect
(
migration_relation
.
last
).
to
have_attributes
(
job_class_name:
'CopyColumnUsingBackgroundMigrationJob'
,
table_name:
table
.
to_s
,
column_name:
column
.
to_s
,
min_value:
1
,
max_value:
last_record
.
id
,
interval:
120
,
batch_size:
2
,
sub_batch_size:
1
,
job_arguments:
[[
column
.
to_s
],
[
"
#{
column
}
_convert_to_bigint"
]]
)
end
end
context
'when multiple columns are being converted'
do
let
(
:other_column
)
{
:other_id
}
let
(
:other_tmp_column
)
{
"
#{
other_column
}
_convert_to_bigint"
}
let
(
:columns
)
{
[
column
,
other_column
]
}
it
'creates the batched migration tracking record'
do
last_record
=
model_class
.
create!
(
message:
'goodbye'
,
other_id:
50
)
expect
do
model
.
backfill_conversion_of_integer_to_bigint
(
table
,
columns
,
batch_size:
2
,
sub_batch_size:
1
)
end
.
to
change
{
migration_relation
.
count
}.
by
(
1
)
expect
(
migration_relation
.
last
).
to
have_attributes
(
job_class_name:
'CopyColumnUsingBackgroundMigrationJob'
,
table_name:
table
.
to_s
,
column_name:
column
.
to_s
,
min_value:
1
,
max_value:
last_record
.
id
,
interval:
120
,
batch_size:
2
,
sub_batch_size:
1
,
job_arguments:
[[
column
.
to_s
,
other_column
.
to_s
],
[
tmp_column
,
other_tmp_column
]]
)
end
end
context
'when the migration should be performed inline'
do
let
(
:columns
)
{
column
}
it
'calls the runner to run the entire migration'
do
expect
(
model
).
to
receive
(
:perform_background_migration_inline?
).
and_return
(
true
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment