Commit 33253988 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Partition issue search data table

parent 5dc6368c
......@@ -11,7 +11,8 @@
# pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
#
# This module sets up an after_commit hook that updates the search data
# when the searchable columns are changed.
# when the searchable columns are changed. You will need to implement the
# `#persist_pg_full_text_search_vector` method that does the actual insert or update.
#
# This also adds a `pg_full_text_search` scope so you can do:
#
......@@ -29,8 +30,7 @@ module PgFullTextSearchable
tsvector_arel_node(column, weight)&.to_sql
end
association = self.class.reflect_on_association(:search_data)
association.klass.upsert({ association.foreign_key => id, search_vector: Arel.sql(tsvector_sql_nodes.compact.join(' || ')) })
persist_pg_full_text_search_vector(Arel.sql(tsvector_sql_nodes.compact.join(' || ')))
rescue ActiveRecord::StatementInvalid => e
raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')
......@@ -43,6 +43,10 @@ module PgFullTextSearchable
private
def persist_pg_full_text_search_vector(search_vector)
raise NotImplementedError
end
def tsvector_arel_node(column, weight)
return if self[column].blank?
......@@ -76,6 +80,12 @@ module PgFullTextSearchable
pg_full_text_searchable_columns[column[:name]] = column[:weight]
end
# We update this outside the transaction because this could raise an error if the resulting tsvector
# is too long. When that happens, we still persist the create / update but the model will not have a
# search data record. This is fine in most cases because this is a very rare occurrence and only happens
# with strings that are most likely unsearchable anyway.
#
# We also do not want to use a subtransaction here due to: https://gitlab.com/groups/gitlab-org/-/epics/6540
after_save_commit do
next unless pg_full_text_searchable_columns.keys.any? { |f| saved_changes.has_key?(f) }
......
......@@ -237,6 +237,11 @@ class Issue < ApplicationRecord
def order_upvotes_asc
reorder(upvotes_count: :asc)
end
override :pg_full_text_search
def pg_full_text_search(search_term)
super.where('issue_search_data.project_id = issues.project_id')
end
end
def next_object_by_relative_position(ignoring: nil, order: :asc)
......@@ -615,6 +620,11 @@ class Issue < ApplicationRecord
private
override :persist_pg_full_text_search_vector
def persist_pg_full_text_search_vector(search_vector)
Issues::SearchData.upsert({ project_id: project_id, issue_id: id, search_vector: search_vector }, unique_by: %i(project_id issue_id))
end
def spammable_attribute_changed?
title_changed? ||
description_changed? ||
......
......@@ -2,6 +2,8 @@
module Issues
class SearchData < ApplicationRecord
extend SuppressCompositePrimaryKeyWarning
self.table_name = 'issue_search_data'
belongs_to :issue
......
# frozen_string_literal: true
class CreateIssueSearchTable < Gitlab::Database::Migration[1.0]
def change
create_table :issue_search_data, id: false do |t|
t.references :issue, index: false, default: nil, primary_key: true, foreign_key: { on_delete: :cascade }, type: :bigint
t.timestamps_with_timezone default: -> { 'CURRENT_TIMESTAMP' }
t.tsvector :search_vector
t.index :search_vector, using: :gin, name: 'index_issue_search_data_on_search_vector'
end
include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers
def up
execute <<~SQL
CREATE TABLE issue_search_data (
project_id bigint NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
issue_id bigint NOT NULL REFERENCES issues(id) ON DELETE CASCADE,
created_at timestamp with time zone DEFAULT NOW() NOT NULL,
updated_at timestamp with time zone DEFAULT NOW() NOT NULL,
search_vector tsvector,
PRIMARY KEY (project_id, issue_id)
) PARTITION BY HASH (project_id)
SQL
# rubocop: disable Migration/AddIndex
add_index :issue_search_data, :issue_id
add_index :issue_search_data, :search_vector, using: :gin, name: 'index_issue_search_data_on_search_vector'
# rubocop: enable Migration/AddIndex
create_hash_partitions :issue_search_data, 64
end
def down
drop_table :issue_search_data
end
end
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -25,8 +25,9 @@ module Gitlab
def update_search_data(relation)
relation.klass.connection.execute(
<<~SQL
INSERT INTO issue_search_data (issue_id, search_vector, created_at, updated_at)
INSERT INTO issue_search_data (project_id, issue_id, search_vector, created_at, updated_at)
SELECT
project_id,
id,
setweight(to_tsvector('english', LEFT(title, 255)), 'A') || setweight(to_tsvector('english', LEFT(REGEXP_REPLACE(description, '[A-Za-z0-9+/]{50,}', ' ', 'g'), 1048576)), 'B'),
NOW(),
......
......@@ -3,12 +3,20 @@
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BackfillIssueSearchData do
let(:issues_table) { table(:issues) }
let(:namespaces_table) { table(:namespaces) }
let(:projects_table) { table(:projects) }
let(:issue_search_data_table) { table(:issue_search_data) }
let!(:issues) { Array.new(10) { issues_table.create!(title: 'test title', description: 'test description') } }
let!(:namespace) { namespaces_table.create!(name: 'gitlab-org', path: 'gitlab-org') }
let!(:project) { projects_table.create!(name: 'gitlab', path: 'gitlab-org/gitlab-ce', namespace_id: namespace.id) }
let!(:issues) { Array.new(10) { table(:issues).create!(project_id: project.id, title: 'test title', description: 'test description') } }
let(:migration) { described_class.new }
before do
allow(migration).to receive(:sleep)
end
it 'backfills search data for the specified records' do
# sleeps for every sub-batch
expect(migration).to receive(:sleep).with(0.05).exactly(3).times
......@@ -20,12 +28,12 @@ RSpec.describe Gitlab::BackgroundMigration::BackfillIssueSearchData do
it 'skips issues that already have search data' do
old_time = Time.new(2019, 1, 1).in_time_zone
issue_search_data_table.create!(issue_id: issues[0].id, updated_at: old_time)
issue_search_data_table.create!(project_id: project.id, issue_id: issues[0].id, updated_at: old_time)
migration.perform(issues[0].id, issues[5].id, :issues, :id, 2, 50)
expect(issue_search_data_table.count).to eq(6)
expect(issue_search_data_table.find(issues[0].id).updated_at).to be_like_time(old_time)
expect(issue_search_data_table.find_by_issue_id(issues[0].id).updated_at).to be_like_time(old_time)
end
it 'rescues batch with bad data and inserts other rows' do
......
......@@ -3,14 +3,21 @@
require 'spec_helper'
RSpec.describe PgFullTextSearchable do
let(:project) { create(:project) }
let(:model_class) do
Class.new(ActiveRecord::Base) do
include PgFullTextSearchable
self.table_name = 'issues'
belongs_to :project
has_one :search_data, class_name: 'Issues::SearchData'
def persist_pg_full_text_search_vector(search_vector)
Issues::SearchData.upsert({ project_id: project_id, issue_id: id, search_vector: search_vector }, unique_by: %i(project_id issue_id))
end
def self.name
'Issue'
end
......@@ -32,7 +39,7 @@ RSpec.describe PgFullTextSearchable do
end
describe 'after commit hook' do
let(:model) { model_class.create! }
let(:model) { model_class.create!(project: project) }
before do
model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }]
......@@ -56,9 +63,9 @@ RSpec.describe PgFullTextSearchable do
end
describe '.pg_full_text_search' do
let(:english) { model_class.create!(title: 'title', description: 'something english') }
let(:with_accent) { model_class.create!(title: 'Jürgen', description: 'Ærøskøbing') }
let(:japanese) { model_class.create!(title: '日本語 title', description: 'another english description') }
let(:english) { model_class.create!(project: project, title: 'title', description: 'something english') }
let(:with_accent) { model_class.create!(project: project, title: 'Jürgen', description: 'Ærøskøbing') }
let(:japanese) { model_class.create!(project: project, title: '日本語 title', description: 'another english description') }
before do
model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
......@@ -84,7 +91,7 @@ RSpec.describe PgFullTextSearchable do
end
describe '#update_search_data!' do
let(:model) { model_class.create!(title: 'title', description: 'description') }
let(:model) { model_class.create!(project: project, title: 'title', description: 'description') }
before do
model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
......@@ -98,7 +105,7 @@ RSpec.describe PgFullTextSearchable do
end
context 'with accented and non-Latin characters' do
let(:model) { model_class.create!(title: '日本語', description: 'Jürgen') }
let(:model) { model_class.create!(project: project, title: '日本語', description: 'Jürgen') }
it 'transliterates accented characters and removes non-Latin ones' do
model.update_search_data!
......@@ -118,7 +125,7 @@ RSpec.describe PgFullTextSearchable do
context 'with strings that go over tsvector limit', :delete do
let(:long_string) { Array.new(30_000) { SecureRandom.hex }.join(' ') }
let(:model) { model_class.create!(title: 'title', description: long_string) }
let(:model) { model_class.create!(project: project, title: 'title', description: long_string) }
it 'does not raise an exception' do
expect(Gitlab::AppJsonLogger).to receive(:error).with(
......@@ -130,5 +137,26 @@ RSpec.describe PgFullTextSearchable do
expect(model.search_data).to eq(nil)
end
end
context 'when model class does not implement persist_pg_full_text_search_vector' do
let(:model_class) do
Class.new(ActiveRecord::Base) do
include PgFullTextSearchable
self.table_name = 'issues'
belongs_to :project
has_one :search_data, class_name: 'Issues::SearchData'
def self.name
'Issue'
end
end
end
it 'raises an error' do
expect { model.update_search_data! }.to raise_error(NotImplementedError)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment