Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
33253988
Commit
33253988
authored
Nov 12, 2021
by
Heinrich Lee Yu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Partition issue search data table
parent
5dc6368c
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1651 additions
and
161 deletions
+1651
-161
app/models/concerns/pg_full_text_searchable.rb
app/models/concerns/pg_full_text_searchable.rb
+13
-3
app/models/issue.rb
app/models/issue.rb
+10
-0
app/models/issues/search_data.rb
app/models/issues/search_data.rb
+2
-0
db/migrate/20211007090229_create_issue_search_table.rb
db/migrate/20211007090229_create_issue_search_table.rb
+24
-8
db/structure.sql
db/structure.sql
+1553
-138
lib/gitlab/background_migration/backfill_issue_search_data.rb
...gitlab/background_migration/backfill_issue_search_data.rb
+2
-1
spec/lib/gitlab/background_migration/backfill_issue_search_data_spec.rb
...b/background_migration/backfill_issue_search_data_spec.rb
+12
-4
spec/models/concerns/pg_full_text_searchable_spec.rb
spec/models/concerns/pg_full_text_searchable_spec.rb
+35
-7
No files found.
app/models/concerns/pg_full_text_searchable.rb
View file @
33253988
...
...
@@ -11,7 +11,8 @@
# pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
#
# This module sets up an after_commit hook that updates the search data
# when the searchable columns are changed.
# when the searchable columns are changed. You will need to implement the
# `#persist_pg_full_text_search_vector` method that does the actual insert or update.
#
# This also adds a `pg_full_text_search` scope so you can do:
#
...
...
@@ -29,8 +30,7 @@ module PgFullTextSearchable
tsvector_arel_node
(
column
,
weight
)
&
.
to_sql
end
association
=
self
.
class
.
reflect_on_association
(
:search_data
)
association
.
klass
.
upsert
({
association
.
foreign_key
=>
id
,
search_vector:
Arel
.
sql
(
tsvector_sql_nodes
.
compact
.
join
(
' || '
))
})
persist_pg_full_text_search_vector
(
Arel
.
sql
(
tsvector_sql_nodes
.
compact
.
join
(
' || '
)))
rescue
ActiveRecord
::
StatementInvalid
=>
e
raise
unless
e
.
cause
.
is_a?
(
PG
::
ProgramLimitExceeded
)
&&
e
.
message
.
include?
(
'string is too long for tsvector'
)
...
...
@@ -43,6 +43,10 @@ module PgFullTextSearchable
private
def
persist_pg_full_text_search_vector
(
search_vector
)
raise
NotImplementedError
end
def
tsvector_arel_node
(
column
,
weight
)
return
if
self
[
column
].
blank?
...
...
@@ -76,6 +80,12 @@ module PgFullTextSearchable
pg_full_text_searchable_columns
[
column
[
:name
]]
=
column
[
:weight
]
end
# We update this outside the transaction because this could raise an error if the resulting tsvector
# is too long. When that happens, we still persist the create / update but the model will not have a
# search data record. This is fine in most cases because this is a very rare occurrence and only happens
# with strings that are most likely unsearchable anyway.
#
# We also do not want to use a subtransaction here due to: https://gitlab.com/groups/gitlab-org/-/epics/6540
after_save_commit
do
next
unless
pg_full_text_searchable_columns
.
keys
.
any?
{
|
f
|
saved_changes
.
has_key?
(
f
)
}
...
...
app/models/issue.rb
View file @
33253988
...
...
@@ -237,6 +237,11 @@ class Issue < ApplicationRecord
def
order_upvotes_asc
reorder
(
upvotes_count: :asc
)
end
override
:pg_full_text_search
def
pg_full_text_search
(
search_term
)
super
.
where
(
'issue_search_data.project_id = issues.project_id'
)
end
end
def
next_object_by_relative_position
(
ignoring:
nil
,
order: :asc
)
...
...
@@ -615,6 +620,11 @@ class Issue < ApplicationRecord
private
override
:persist_pg_full_text_search_vector
def
persist_pg_full_text_search_vector
(
search_vector
)
Issues
::
SearchData
.
upsert
({
project_id:
project_id
,
issue_id:
id
,
search_vector:
search_vector
},
unique_by:
%i(project_id issue_id)
)
end
def
spammable_attribute_changed?
title_changed?
||
description_changed?
||
...
...
app/models/issues/search_data.rb
View file @
33253988
...
...
@@ -2,6 +2,8 @@
module
Issues
class
SearchData
<
ApplicationRecord
extend
SuppressCompositePrimaryKeyWarning
self
.
table_name
=
'issue_search_data'
belongs_to
:issue
...
...
db/migrate/20211007090229_create_issue_search_table.rb
View file @
33253988
# frozen_string_literal: true
class
CreateIssueSearchTable
<
Gitlab
::
Database
::
Migration
[
1.0
]
def
change
create_table
:issue_search_data
,
id:
false
do
|
t
|
t
.
references
:issue
,
index:
false
,
default:
nil
,
primary_key:
true
,
foreign_key:
{
on_delete: :cascade
},
type: :bigint
t
.
timestamps_with_timezone
default:
->
{
'CURRENT_TIMESTAMP'
}
t
.
tsvector
:search_vector
t
.
index
:search_vector
,
using: :gin
,
name:
'index_issue_search_data_on_search_vector'
end
include
Gitlab
::
Database
::
PartitioningMigrationHelpers
::
TableManagementHelpers
def
up
execute
<<~
SQL
CREATE TABLE issue_search_data (
project_id bigint NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
issue_id bigint NOT NULL REFERENCES issues(id) ON DELETE CASCADE,
created_at timestamp with time zone DEFAULT NOW() NOT NULL,
updated_at timestamp with time zone DEFAULT NOW() NOT NULL,
search_vector tsvector,
PRIMARY KEY (project_id, issue_id)
) PARTITION BY HASH (project_id)
SQL
# rubocop: disable Migration/AddIndex
add_index
:issue_search_data
,
:issue_id
add_index
:issue_search_data
,
:search_vector
,
using: :gin
,
name:
'index_issue_search_data_on_search_vector'
# rubocop: enable Migration/AddIndex
create_hash_partitions
:issue_search_data
,
64
end
def
down
drop_table
:issue_search_data
end
end
db/structure.sql
View file @
33253988
This source diff could not be displayed because it is too large. You can
view the blob
instead.
lib/gitlab/background_migration/backfill_issue_search_data.rb
View file @
33253988
...
...
@@ -25,8 +25,9 @@ module Gitlab
def
update_search_data
(
relation
)
relation
.
klass
.
connection
.
execute
(
<<~
SQL
INSERT INTO issue_search_data (issue_id, search_vector, created_at, updated_at)
INSERT INTO issue_search_data (
project_id,
issue_id, search_vector, created_at, updated_at)
SELECT
project_id,
id,
setweight(to_tsvector('english', LEFT(title, 255)), 'A') || setweight(to_tsvector('english', LEFT(REGEXP_REPLACE(description, '[A-Za-z0-9+/]{50,}', ' ', 'g'), 1048576)), 'B'),
NOW(),
...
...
spec/lib/gitlab/background_migration/backfill_issue_search_data_spec.rb
View file @
33253988
...
...
@@ -3,12 +3,20 @@
require
'spec_helper'
RSpec
.
describe
Gitlab
::
BackgroundMigration
::
BackfillIssueSearchData
do
let
(
:issues_table
)
{
table
(
:issues
)
}
let
(
:namespaces_table
)
{
table
(
:namespaces
)
}
let
(
:projects_table
)
{
table
(
:projects
)
}
let
(
:issue_search_data_table
)
{
table
(
:issue_search_data
)
}
let!
(
:issues
)
{
Array
.
new
(
10
)
{
issues_table
.
create!
(
title:
'test title'
,
description:
'test description'
)
}
}
let!
(
:namespace
)
{
namespaces_table
.
create!
(
name:
'gitlab-org'
,
path:
'gitlab-org'
)
}
let!
(
:project
)
{
projects_table
.
create!
(
name:
'gitlab'
,
path:
'gitlab-org/gitlab-ce'
,
namespace_id:
namespace
.
id
)
}
let!
(
:issues
)
{
Array
.
new
(
10
)
{
table
(
:issues
).
create!
(
project_id:
project
.
id
,
title:
'test title'
,
description:
'test description'
)
}
}
let
(
:migration
)
{
described_class
.
new
}
before
do
allow
(
migration
).
to
receive
(
:sleep
)
end
it
'backfills search data for the specified records'
do
# sleeps for every sub-batch
expect
(
migration
).
to
receive
(
:sleep
).
with
(
0.05
).
exactly
(
3
).
times
...
...
@@ -20,12 +28,12 @@ RSpec.describe Gitlab::BackgroundMigration::BackfillIssueSearchData do
it
'skips issues that already have search data'
do
old_time
=
Time
.
new
(
2019
,
1
,
1
).
in_time_zone
issue_search_data_table
.
create!
(
issue_id:
issues
[
0
].
id
,
updated_at:
old_time
)
issue_search_data_table
.
create!
(
project_id:
project
.
id
,
issue_id:
issues
[
0
].
id
,
updated_at:
old_time
)
migration
.
perform
(
issues
[
0
].
id
,
issues
[
5
].
id
,
:issues
,
:id
,
2
,
50
)
expect
(
issue_search_data_table
.
count
).
to
eq
(
6
)
expect
(
issue_search_data_table
.
find
(
issues
[
0
].
id
).
updated_at
).
to
be_like_time
(
old_time
)
expect
(
issue_search_data_table
.
find
_by_issue_id
(
issues
[
0
].
id
).
updated_at
).
to
be_like_time
(
old_time
)
end
it
'rescues batch with bad data and inserts other rows'
do
...
...
spec/models/concerns/pg_full_text_searchable_spec.rb
View file @
33253988
...
...
@@ -3,14 +3,21 @@
require
'spec_helper'
RSpec
.
describe
PgFullTextSearchable
do
let
(
:project
)
{
create
(
:project
)
}
let
(
:model_class
)
do
Class
.
new
(
ActiveRecord
::
Base
)
do
include
PgFullTextSearchable
self
.
table_name
=
'issues'
belongs_to
:project
has_one
:search_data
,
class_name:
'Issues::SearchData'
def
persist_pg_full_text_search_vector
(
search_vector
)
Issues
::
SearchData
.
upsert
({
project_id:
project_id
,
issue_id:
id
,
search_vector:
search_vector
},
unique_by:
%i(project_id issue_id)
)
end
def
self
.
name
'Issue'
end
...
...
@@ -32,7 +39,7 @@ RSpec.describe PgFullTextSearchable do
end
describe
'after commit hook'
do
let
(
:model
)
{
model_class
.
create!
}
let
(
:model
)
{
model_class
.
create!
(
project:
project
)
}
before
do
model_class
.
pg_full_text_searchable
columns:
[{
name:
'title'
,
weight:
'A'
}]
...
...
@@ -56,9 +63,9 @@ RSpec.describe PgFullTextSearchable do
end
describe
'.pg_full_text_search'
do
let
(
:english
)
{
model_class
.
create!
(
title:
'title'
,
description:
'something english'
)
}
let
(
:with_accent
)
{
model_class
.
create!
(
title:
'Jürgen'
,
description:
'Ærøskøbing'
)
}
let
(
:japanese
)
{
model_class
.
create!
(
title:
'日本語 title'
,
description:
'another english description'
)
}
let
(
:english
)
{
model_class
.
create!
(
project:
project
,
title:
'title'
,
description:
'something english'
)
}
let
(
:with_accent
)
{
model_class
.
create!
(
project:
project
,
title:
'Jürgen'
,
description:
'Ærøskøbing'
)
}
let
(
:japanese
)
{
model_class
.
create!
(
project:
project
,
title:
'日本語 title'
,
description:
'another english description'
)
}
before
do
model_class
.
pg_full_text_searchable
columns:
[{
name:
'title'
,
weight:
'A'
},
{
name:
'description'
,
weight:
'B'
}]
...
...
@@ -84,7 +91,7 @@ RSpec.describe PgFullTextSearchable do
end
describe
'#update_search_data!'
do
let
(
:model
)
{
model_class
.
create!
(
title:
'title'
,
description:
'description'
)
}
let
(
:model
)
{
model_class
.
create!
(
project:
project
,
title:
'title'
,
description:
'description'
)
}
before
do
model_class
.
pg_full_text_searchable
columns:
[{
name:
'title'
,
weight:
'A'
},
{
name:
'description'
,
weight:
'B'
}]
...
...
@@ -98,7 +105,7 @@ RSpec.describe PgFullTextSearchable do
end
context
'with accented and non-Latin characters'
do
let
(
:model
)
{
model_class
.
create!
(
title:
'日本語'
,
description:
'Jürgen'
)
}
let
(
:model
)
{
model_class
.
create!
(
project:
project
,
title:
'日本語'
,
description:
'Jürgen'
)
}
it
'transliterates accented characters and removes non-Latin ones'
do
model
.
update_search_data!
...
...
@@ -118,7 +125,7 @@ RSpec.describe PgFullTextSearchable do
context
'with strings that go over tsvector limit'
,
:delete
do
let
(
:long_string
)
{
Array
.
new
(
30_000
)
{
SecureRandom
.
hex
}.
join
(
' '
)
}
let
(
:model
)
{
model_class
.
create!
(
title:
'title'
,
description:
long_string
)
}
let
(
:model
)
{
model_class
.
create!
(
project:
project
,
title:
'title'
,
description:
long_string
)
}
it
'does not raise an exception'
do
expect
(
Gitlab
::
AppJsonLogger
).
to
receive
(
:error
).
with
(
...
...
@@ -130,5 +137,26 @@ RSpec.describe PgFullTextSearchable do
expect
(
model
.
search_data
).
to
eq
(
nil
)
end
end
context
'when model class does not implement persist_pg_full_text_search_vector'
do
let
(
:model_class
)
do
Class
.
new
(
ActiveRecord
::
Base
)
do
include
PgFullTextSearchable
self
.
table_name
=
'issues'
belongs_to
:project
has_one
:search_data
,
class_name:
'Issues::SearchData'
def
self
.
name
'Issue'
end
end
end
it
'raises an error'
do
expect
{
model
.
update_search_data!
}.
to
raise_error
(
NotImplementedError
)
end
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment