Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
5dc6368c
Commit
5dc6368c
authored
Oct 26, 2021
by
Heinrich Lee Yu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add migration to backfill search data
Runs a batched migration job to backfill issue search data
parent
cd8596fe
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
134 additions
and
0 deletions
+134
-0
db/post_migrate/20211026070408_backfill_issue_search_data.rb
db/post_migrate/20211026070408_backfill_issue_search_data.rb
+22
-0
db/schema_migrations/20211026070408
db/schema_migrations/20211026070408
+1
-0
lib/gitlab/background_migration/backfill_issue_search_data.rb
...gitlab/background_migration/backfill_issue_search_data.rb
+62
-0
spec/lib/gitlab/background_migration/backfill_issue_search_data_spec.rb
...b/background_migration/backfill_issue_search_data_spec.rb
+49
-0
No files found.
db/post_migrate/20211026070408_backfill_issue_search_data.rb
0 → 100644
View file @
5dc6368c
# frozen_string_literal: true
class
BackfillIssueSearchData
<
Gitlab
::
Database
::
Migration
[
1.0
]
MIGRATION
=
'BackfillIssueSearchData'
def
up
queue_batched_background_migration
(
MIGRATION
,
:issues
,
:id
,
batch_size:
100_000
,
sub_batch_size:
1_000
,
job_interval:
5
.
minutes
)
end
def
down
Gitlab
::
Database
::
BackgroundMigration
::
BatchedMigration
.
for_configuration
(
MIGRATION
,
:issues
,
:id
,
[])
.
delete_all
end
end
db/schema_migrations/20211026070408
0 → 100644
View file @
5dc6368c
630899d5a7f833ce0533ae553de89e70bd03fad9b438fd367e3a568261b08b00
\ No newline at end of file
lib/gitlab/background_migration/backfill_issue_search_data.rb
0 → 100644
View file @
5dc6368c
# frozen_string_literal: true
# rubocop:disable Style/Documentation
module
Gitlab
module
BackgroundMigration
# Backfills the new `issue_search_data` table, which contains
# the tsvector from the issue title and description.
class
BackfillIssueSearchData
include
Gitlab
::
Database
::
DynamicModelHelpers
def
perform
(
start_id
,
stop_id
,
batch_table
,
batch_column
,
sub_batch_size
,
pause_ms
)
define_batchable_model
(
batch_table
,
connection:
ActiveRecord
::
Base
.
connection
).
where
(
batch_column
=>
start_id
..
stop_id
).
each_batch
(
of:
sub_batch_size
)
do
|
sub_batch
|
update_search_data
(
sub_batch
)
sleep
(
pause_ms
*
0.001
)
rescue
ActiveRecord
::
StatementInvalid
=>
e
raise
unless
e
.
cause
.
is_a?
(
PG
::
ProgramLimitExceeded
)
&&
e
.
message
.
include?
(
'string is too long for tsvector'
)
update_search_data_individually
(
sub_batch
,
pause_ms
)
end
end
private
def
update_search_data
(
relation
)
relation
.
klass
.
connection
.
execute
(
<<~
SQL
INSERT INTO issue_search_data (issue_id, search_vector, created_at, updated_at)
SELECT
id,
setweight(to_tsvector('english', LEFT(title, 255)), 'A') || setweight(to_tsvector('english', LEFT(REGEXP_REPLACE(description, '[A-Za-z0-9+/]{50,}', ' ', 'g'), 1048576)), 'B'),
NOW(),
NOW()
FROM issues
WHERE issues.id IN (
#{
relation
.
select
(
:id
).
to_sql
}
)
ON CONFLICT DO NOTHING
SQL
)
end
def
update_search_data_individually
(
relation
,
pause_ms
)
relation
.
pluck
(
:id
).
each
do
|
issue_id
|
update_search_data
(
relation
.
klass
.
where
(
id:
issue_id
))
sleep
(
pause_ms
*
0.001
)
rescue
ActiveRecord
::
StatementInvalid
=>
e
raise
unless
e
.
cause
.
is_a?
(
PG
::
ProgramLimitExceeded
)
&&
e
.
message
.
include?
(
'string is too long for tsvector'
)
logger
.
error
(
message:
'Error updating search data: string is too long for tsvector'
,
class:
relation
.
klass
.
name
,
model_id:
issue_id
)
end
end
def
logger
@logger
||=
Gitlab
::
BackgroundMigration
::
Logger
.
build
end
end
end
end
spec/lib/gitlab/background_migration/backfill_issue_search_data_spec.rb
0 → 100644
View file @
5dc6368c
# frozen_string_literal: true
require
'spec_helper'
RSpec
.
describe
Gitlab
::
BackgroundMigration
::
BackfillIssueSearchData
do
let
(
:issues_table
)
{
table
(
:issues
)
}
let
(
:issue_search_data_table
)
{
table
(
:issue_search_data
)
}
let!
(
:issues
)
{
Array
.
new
(
10
)
{
issues_table
.
create!
(
title:
'test title'
,
description:
'test description'
)
}
}
let
(
:migration
)
{
described_class
.
new
}
it
'backfills search data for the specified records'
do
# sleeps for every sub-batch
expect
(
migration
).
to
receive
(
:sleep
).
with
(
0.05
).
exactly
(
3
).
times
migration
.
perform
(
issues
[
0
].
id
,
issues
[
5
].
id
,
:issues
,
:id
,
2
,
50
)
expect
(
issue_search_data_table
.
count
).
to
eq
(
6
)
end
it
'skips issues that already have search data'
do
old_time
=
Time
.
new
(
2019
,
1
,
1
).
in_time_zone
issue_search_data_table
.
create!
(
issue_id:
issues
[
0
].
id
,
updated_at:
old_time
)
migration
.
perform
(
issues
[
0
].
id
,
issues
[
5
].
id
,
:issues
,
:id
,
2
,
50
)
expect
(
issue_search_data_table
.
count
).
to
eq
(
6
)
expect
(
issue_search_data_table
.
find
(
issues
[
0
].
id
).
updated_at
).
to
be_like_time
(
old_time
)
end
it
'rescues batch with bad data and inserts other rows'
do
issues
[
1
].
update!
(
description:
Array
.
new
(
30_000
)
{
SecureRandom
.
hex
}.
join
(
' '
))
expect_next_instance_of
(
Gitlab
::
BackgroundMigration
::
Logger
)
do
|
logger
|
expect
(
logger
).
to
receive
(
:error
).
with
(
a_hash_including
(
message:
/string is too long for tsvector/
,
model_id:
issues
[
1
].
id
))
end
expect
{
migration
.
perform
(
issues
[
0
].
id
,
issues
[
5
].
id
,
:issues
,
:id
,
2
,
50
)
}.
not_to
raise_error
expect
(
issue_search_data_table
.
count
).
to
eq
(
5
)
expect
(
issue_search_data_table
.
find_by_issue_id
(
issues
[
1
].
id
)).
to
eq
(
nil
)
end
it
're-raises other errors'
do
allow
(
migration
).
to
receive
(
:update_search_data
).
and_raise
(
ActiveRecord
::
StatementTimeout
)
expect
{
migration
.
perform
(
issues
[
0
].
id
,
issues
[
5
].
id
,
:issues
,
:id
,
2
,
50
)
}.
to
raise_error
(
ActiveRecord
::
StatementTimeout
)
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment