Merge branch '273784-issues-full-text-search' into 'master'

Use PG full-text search for searching issues See merge request gitlab-org/gitlab!71913

Merge branch '273784-issues-full-text-search' into 'master'
Use PG full-text search for searching issues See merge request gitlab-org/gitlab!71913
3c419ef1 · Alex Ives · fa583db1 · 33253988 · 3c419ef1 · 3c419ef1
Commit 3c419ef1 authored Feb 24, 2022 by Alex Ives
21 changed files
--- a/app/controllers/concerns/issuable_collections.rb
+++ b/app/controllers/concerns/issuable_collections.rb
@@ -117,6 +117,10 @@ module IssuableCollections
        options[:attempt_group_search_optimizations] = true
      end

+      if collection_type == 'Issue' && Feature.enabled?(:issues_full_text_search, @project || @group, default_enabled: :yaml)
+        options[:attempt_full_text_search] = true
+      end
+
      params.permit(finder_type.valid_params).merge(options)
    end
  end

--- a/app/finders/issuable_finder.rb
+++ b/app/finders/issuable_finder.rb
@@ -37,6 +37,7 @@
 #     attempt_project_search_optimizations: boolean
 #     crm_contact_id: integer
 #     crm_organization_id: integer
+#     attempt_full_text_search: boolean
 #
 class IssuableFinder
  prepend FinderWithCrossProjectAccess
@@ -46,6 +47,7 @@ class IssuableFinder

  requires_cross_project_access unless: -> { params.project? }

+  FULL_TEXT_SEARCH_TERM_REGEX = /\A[\p{ASCII}|\p{Latin}]+\z/.freeze
  NEGATABLE_PARAMS_HELPER_KEYS = %i[project_id scope status include_subgroups].freeze

  attr_accessor :current_user, :params
@@ -331,6 +333,8 @@ class IssuableFinder
    return items if items.is_a?(ActiveRecord::NullRelation)
    return items if Feature.enabled?(:disable_anonymous_search, type: :ops) && current_user.nil?

+    return items.pg_full_text_search(search) if use_full_text_search?
+
    if use_cte_for_search?
      cte = Gitlab::SQL::CTE.new(klass.table_name, items)

@@ -341,6 +345,10 @@ class IssuableFinder
  end
  # rubocop: enable CodeReuse/ActiveRecord

+  def use_full_text_search?
+    params[:attempt_full_text_search] && params[:search] =~ FULL_TEXT_SEARCH_TERM_REGEX
+  end
+
  # rubocop: disable CodeReuse/ActiveRecord
  def by_iids(items)
    params[:iids].present? ? items.where(iid: params[:iids]) : items

--- a/app/models/concerns/pg_full_text_searchable.rb
+++ b/app/models/concerns/pg_full_text_searchable.rb
+# frozen_string_literal: true
+
+# This module adds PG full-text search capabilities to a model.
+# A `search_data` association with a `search_vector` column is required.
+#
+# Declare the fields that will be part of the search vector with their
+# corresponding weights. Possible values for weight are A, B, C, or D.
+# For example:
+#
+# include PgFullTextSearchable
+# pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
+#
+# This module sets up an after_commit hook that updates the search data
+# when the searchable columns are changed. You will need to implement the
+# `#persist_pg_full_text_search_vector` method that does the actual insert or update.
+#
+# This also adds a `pg_full_text_search` scope so you can do:
+#
+# Model.pg_full_text_search("some search term")
+
+module PgFullTextSearchable
+  extend ActiveSupport::Concern
+
+  LONG_WORDS_REGEX = %r([A-Za-z0-9+/]{50,}).freeze
+  TSVECTOR_MAX_LENGTH = 1.megabyte.freeze
+  TEXT_SEARCH_DICTIONARY = 'english'
+
+  def update_search_data!
+    tsvector_sql_nodes = self.class.pg_full_text_searchable_columns.map do |column, weight|
+      tsvector_arel_node(column, weight)&.to_sql
+    end
+
+    persist_pg_full_text_search_vector(Arel.sql(tsvector_sql_nodes.compact.join(' || ')))
+  rescue ActiveRecord::StatementInvalid => e
+    raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')
+
+    Gitlab::AppJsonLogger.error(
+      message: 'Error updating search data: string is too long for tsvector',
+      class: self.class.name,
+      model_id: self.id
+    )
+  end
+
+  private
+
+  def persist_pg_full_text_search_vector(search_vector)
+    raise NotImplementedError
+  end
+
+  def tsvector_arel_node(column, weight)
+    return if self[column].blank?
+
+    column_text = self[column].gsub(LONG_WORDS_REGEX, ' ')
+    column_text = column_text[0..(TSVECTOR_MAX_LENGTH - 1)]
+    column_text = ActiveSupport::Inflector.transliterate(column_text)
+
+    Arel::Nodes::NamedFunction.new(
+      'setweight',
+      [
+        Arel::Nodes::NamedFunction.new(
+          'to_tsvector',
+          [Arel::Nodes.build_quoted(TEXT_SEARCH_DICTIONARY), Arel::Nodes.build_quoted(column_text)]
+        ),
+        Arel::Nodes.build_quoted(weight)
+      ]
+    )
+  end
+
+  included do
+    cattr_reader :pg_full_text_searchable_columns do
+      {}
+    end
+  end
+
+  class_methods do
+    def pg_full_text_searchable(columns:)
+      raise 'Full text search columns already defined!' if pg_full_text_searchable_columns.present?
+
+      columns.each do |column|
+        pg_full_text_searchable_columns[column[:name]] = column[:weight]
+      end
+
+      # We update this outside the transaction because this could raise an error if the resulting tsvector
+      # is too long. When that happens, we still persist the create / update but the model will not have a
+      # search data record. This is fine in most cases because this is a very rare occurrence and only happens
+      # with strings that are most likely unsearchable anyway.
+      #
+      # We also do not want to use a subtransaction here due to: https://gitlab.com/groups/gitlab-org/-/epics/6540
+      after_save_commit do
+        next unless pg_full_text_searchable_columns.keys.any? { |f| saved_changes.has_key?(f) }
+
+        update_search_data!
+      end
+    end
+
+    def pg_full_text_search(search_term)
+      search_data_table = reflect_on_association(:search_data).klass.arel_table
+
+      joins(:search_data).where(
+        Arel::Nodes::InfixOperation.new(
+          '@@',
+          search_data_table[:search_vector],
+          Arel::Nodes::NamedFunction.new(
+            'websearch_to_tsquery',
+            [Arel::Nodes.build_quoted(TEXT_SEARCH_DICTIONARY), Arel::Nodes.build_quoted(search_term)]
+          )
+        )
+      )
+    end
+  end
+end
--- a/app/models/issue.rb
+++ b/app/models/issue.rb
@@ -24,6 +24,7 @@ class Issue < ApplicationRecord
  include Todoable
  include FromUnion
  include EachBatch
+  include PgFullTextSearchable

  extend ::Gitlab::Utils::Override

@@ -77,6 +78,7 @@ class Issue < ApplicationRecord
    end
  end

+  has_one :search_data, class_name: 'Issues::SearchData'
  has_one :issuable_severity
  has_one :sentry_issue
  has_one :alert_management_alert, class_name: 'AlertManagement::Alert'
@@ -102,6 +104,8 @@ class Issue < ApplicationRecord

  alias_attribute :external_author, :service_desk_reply_to

+  pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
+
  scope :in_projects, ->(project_ids) { where(project_id: project_ids) }
  scope :not_in_projects, ->(project_ids) { where.not(project_id: project_ids) }

@@ -233,6 +237,11 @@ class Issue < ApplicationRecord
    def order_upvotes_asc
      reorder(upvotes_count: :asc)
    end
+
+    override :pg_full_text_search
+    def pg_full_text_search(search_term)
+      super.where('issue_search_data.project_id = issues.project_id')
+    end
  end

  def next_object_by_relative_position(ignoring: nil, order: :asc)
@@ -611,6 +620,11 @@ class Issue < ApplicationRecord

  private

+  override :persist_pg_full_text_search_vector
+  def persist_pg_full_text_search_vector(search_vector)
+    Issues::SearchData.upsert({ project_id: project_id, issue_id: id, search_vector: search_vector }, unique_by: %i(project_id issue_id))
+  end
+
  def spammable_attribute_changed?
    title_changed? ||
      description_changed? ||

--- a/app/models/issues/search_data.rb
+++ b/app/models/issues/search_data.rb
+# frozen_string_literal: true
+
+module Issues
+  class SearchData < ApplicationRecord
+    extend SuppressCompositePrimaryKeyWarning
+
+    self.table_name = 'issue_search_data'
+
+    belongs_to :issue
+  end
+end
--- a/config/feature_flags/development/issues_full_text_search.yml
+++ b/config/feature_flags/development/issues_full_text_search.yml
+---
+name: issues_full_text_search
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/71913
+rollout_issue_url: 
+milestone: '14.5'
+type: development
+group: group::project management
+default_enabled: false
--- a/db/migrate/20211007090229_create_issue_search_table.rb
+++ b/db/migrate/20211007090229_create_issue_search_table.rb
+# frozen_string_literal: true
+
+class CreateIssueSearchTable < Gitlab::Database::Migration[1.0]
+  include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers
+
+  def up
+    execute <<~SQL
+    CREATE TABLE issue_search_data (
+      project_id bigint NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
+      issue_id bigint NOT NULL REFERENCES issues(id) ON DELETE CASCADE,
+      created_at timestamp with time zone DEFAULT NOW() NOT NULL,
+      updated_at timestamp with time zone DEFAULT NOW() NOT NULL,
+      search_vector tsvector,
+      PRIMARY KEY (project_id, issue_id)
+    ) PARTITION BY HASH (project_id)
+    SQL
+
+    # rubocop: disable Migration/AddIndex
+    add_index :issue_search_data, :issue_id
+    add_index :issue_search_data, :search_vector, using: :gin, name: 'index_issue_search_data_on_search_vector'
+    # rubocop: enable Migration/AddIndex
+
+    create_hash_partitions :issue_search_data, 64
+  end
+
+  def down
+    drop_table :issue_search_data
+  end
+end
--- a/db/post_migrate/20211026070408_backfill_issue_search_data.rb
+++ b/db/post_migrate/20211026070408_backfill_issue_search_data.rb
+# frozen_string_literal: true
+
+class BackfillIssueSearchData < Gitlab::Database::Migration[1.0]
+  MIGRATION = 'BackfillIssueSearchData'
+
+  def up
+    queue_batched_background_migration(
+      MIGRATION,
+      :issues,
+      :id,
+      batch_size: 100_000,
+      sub_batch_size: 1_000,
+      job_interval: 5.minutes
+    )
+  end
+
+  def down
+    Gitlab::Database::BackgroundMigration::BatchedMigration
+      .for_configuration(MIGRATION, :issues, :id, [])
+      .delete_all
+  end
+end
--- a/db/schema_migrations/20211007090229
+++ b/db/schema_migrations/20211007090229
+9d87052305a552ce380e81a33c690496c44e332eb86869ea6882f5cd4856ab93
\ No newline at end of file
--- a/db/schema_migrations/20211026070408
+++ b/db/schema_migrations/20211026070408
+630899d5a7f833ce0533ae553de89e70bd03fad9b438fd367e3a568261b08b00
\ No newline at end of file
--- a/db/structure.sql
+++ b/db/structure.sql
--- a/lib/gitlab/background_migration/backfill_issue_search_data.rb
+++ b/lib/gitlab/background_migration/backfill_issue_search_data.rb
+# frozen_string_literal: true
+# rubocop:disable Style/Documentation
+
+module Gitlab
+  module BackgroundMigration
+    # Backfills the new `issue_search_data` table, which contains
+    # the tsvector from the issue title and description.
+    class BackfillIssueSearchData
+      include Gitlab::Database::DynamicModelHelpers
+
+      def perform(start_id, stop_id, batch_table, batch_column, sub_batch_size, pause_ms)
+        define_batchable_model(batch_table, connection: ActiveRecord::Base.connection).where(batch_column => start_id..stop_id).each_batch(of: sub_batch_size) do |sub_batch|
+          update_search_data(sub_batch)
+
+          sleep(pause_ms * 0.001)
+        rescue ActiveRecord::StatementInvalid => e
+          raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')
+
+          update_search_data_individually(sub_batch, pause_ms)
+        end
+      end
+
+      private
+
+      def update_search_data(relation)
+        relation.klass.connection.execute(
+          <<~SQL
+          INSERT INTO issue_search_data (project_id, issue_id, search_vector, created_at, updated_at)
+          SELECT
+            project_id,
+            id,
+            setweight(to_tsvector('english', LEFT(title, 255)), 'A') || setweight(to_tsvector('english', LEFT(REGEXP_REPLACE(description, '[A-Za-z0-9+/]{50,}', ' ', 'g'), 1048576)), 'B'),
+            NOW(),
+            NOW()
+          FROM issues
+          WHERE issues.id IN (#{relation.select(:id).to_sql})
+          ON CONFLICT DO NOTHING
+          SQL
+        )
+      end
+
+      def update_search_data_individually(relation, pause_ms)
+        relation.pluck(:id).each do |issue_id|
+          update_search_data(relation.klass.where(id: issue_id))
+
+          sleep(pause_ms * 0.001)
+        rescue ActiveRecord::StatementInvalid => e
+          raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')
+
+          logger.error(
+            message: 'Error updating search data: string is too long for tsvector',
+            class: relation.klass.name,
+            model_id: issue_id
+          )
+        end
+      end
+
+      def logger
+        @logger ||= Gitlab::BackgroundMigration::Logger.build
+      end
+    end
+  end
+end
--- a/lib/gitlab/database/gitlab_schemas.yml
+++ b/lib/gitlab/database/gitlab_schemas.yml
@@ -274,6 +274,7 @@ issue_emails: :gitlab_main
 issue_email_participants: :gitlab_main
 issue_links: :gitlab_main
 issue_metrics: :gitlab_main
+issue_search_data: :gitlab_main
 issues: :gitlab_main
 issues_prometheus_alert_events: :gitlab_main
 issues_self_managed_prometheus_alert_events: :gitlab_main

--- a/lib/gitlab/issuables_count_for_state.rb
+++ b/lib/gitlab/issuables_count_for_state.rb
@@ -144,7 +144,7 @@ module Gitlab
    def params_include_filters?
      non_filtering_params = %i[
        scope state sort group_id include_subgroups
-        attempt_group_search_optimizations non_archived issue_types
+        attempt_group_search_optimizations attempt_full_text_search non_archived issue_types
      ]

      finder.params.except(*non_filtering_params).values.any?

--- a/spec/controllers/dashboard_controller_spec.rb
+++ b/spec/controllers/dashboard_controller_spec.rb
@@ -13,7 +13,22 @@ RSpec.describe DashboardController do
    end

    describe 'GET issues' do
-      it_behaves_like 'issuables list meta-data', :issue, :issues
+      context 'when issues_full_text_search is disabled' do
+        before do
+          stub_feature_flags(issues_full_text_search: false)
+        end
+
+        it_behaves_like 'issuables list meta-data', :issue, :issues
+      end
+
+      context 'when issues_full_text_search is enabled' do
+        before do
+          stub_feature_flags(issues_full_text_search: true)
+        end
+
+        it_behaves_like 'issuables list meta-data', :issue, :issues
+      end
+
      it_behaves_like 'issuables requiring filter', :issues
    end


--- a/spec/controllers/projects/issues_controller_spec.rb
+++ b/spec/controllers/projects/issues_controller_spec.rb
@@ -72,7 +72,21 @@ RSpec.describe Projects::IssuesController do
        project.add_developer(user)
      end

-      it_behaves_like "issuables list meta-data", :issue
+      context 'when issues_full_text_search is disabled' do
+        before do
+          stub_feature_flags(issues_full_text_search: false)
+        end
+
+        it_behaves_like 'issuables list meta-data', :issue
+      end
+
+      context 'when issues_full_text_search is enabled' do
+        before do
+          stub_feature_flags(issues_full_text_search: true)
+        end
+
+        it_behaves_like 'issuables list meta-data', :issue
+      end

      it_behaves_like 'set sort order from user preference' do
        let(:sorting_param) { 'updated_asc' }

--- a/spec/features/issues/filtered_search/filter_issues_spec.rb
+++ b/spec/features/issues/filtered_search/filter_issues_spec.rb
@@ -497,6 +497,8 @@ RSpec.describe 'Filter issues', :js do
      end

      it 'filters issues by searched text containing special characters' do
+        stub_feature_flags(issues_full_text_search: false)
+
        issue = create(:issue, project: project, author: user, title: "issue with !@\#{$%^&*()-+")

        search = '!@#{$%^&*()-+'

--- a/spec/finders/issues_finder_spec.rb
+++ b/spec/finders/issues_finder_spec.rb
@@ -632,6 +632,29 @@ RSpec.describe IssuesFinder do
        end
      end

+      context 'filtering by issue term using full-text search' do
+        let(:params) { { search: search_term, attempt_full_text_search: true } }
+
+        let_it_be(:english) { create(:issue, project: project1, title: 'title', description: 'something english') }
+        let_it_be(:japanese) { create(:issue, project: project1, title: '日本語 title', description: 'another english description') }
+
+        context 'with latin search term' do
+          let(:search_term) { 'title english' }
+
+          it 'returns matching issues' do
+            expect(issues).to contain_exactly(english, japanese)
+          end
+        end
+
+        context 'with non-latin search term' do
+          let(:search_term) { '日本語' }
+
+          it 'returns matching issues' do
+            expect(issues).to contain_exactly(japanese)
+          end
+        end
+      end
+
      context 'filtering by issues iids' do
        let(:params) { { iids: [issue3.iid] } }


--- a/spec/lib/gitlab/background_migration/backfill_issue_search_data_spec.rb
+++ b/spec/lib/gitlab/background_migration/backfill_issue_search_data_spec.rb
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::BackgroundMigration::BackfillIssueSearchData do
+  let(:namespaces_table) { table(:namespaces) }
+  let(:projects_table) { table(:projects) }
+  let(:issue_search_data_table) { table(:issue_search_data) }
+
+  let!(:namespace) { namespaces_table.create!(name: 'gitlab-org', path: 'gitlab-org') }
+  let!(:project) { projects_table.create!(name: 'gitlab', path: 'gitlab-org/gitlab-ce', namespace_id: namespace.id) }
+  let!(:issues) { Array.new(10) { table(:issues).create!(project_id: project.id, title: 'test title', description: 'test description') } }
+
+  let(:migration) { described_class.new }
+
+  before do
+    allow(migration).to receive(:sleep)
+  end
+
+  it 'backfills search data for the specified records' do
+    # sleeps for every sub-batch
+    expect(migration).to receive(:sleep).with(0.05).exactly(3).times
+
+    migration.perform(issues[0].id, issues[5].id, :issues, :id, 2, 50)
+
+    expect(issue_search_data_table.count).to eq(6)
+  end
+
+  it 'skips issues that already have search data' do
+    old_time = Time.new(2019, 1, 1).in_time_zone
+    issue_search_data_table.create!(project_id: project.id, issue_id: issues[0].id, updated_at: old_time)
+
+    migration.perform(issues[0].id, issues[5].id, :issues, :id, 2, 50)
+
+    expect(issue_search_data_table.count).to eq(6)
+    expect(issue_search_data_table.find_by_issue_id(issues[0].id).updated_at).to be_like_time(old_time)
+  end
+
+  it 'rescues batch with bad data and inserts other rows' do
+    issues[1].update!(description: Array.new(30_000) { SecureRandom.hex }.join(' '))
+
+    expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |logger|
+      expect(logger).to receive(:error).with(a_hash_including(message: /string is too long for tsvector/, model_id: issues[1].id))
+    end
+
+    expect { migration.perform(issues[0].id, issues[5].id, :issues, :id, 2, 50) }.not_to raise_error
+
+    expect(issue_search_data_table.count).to eq(5)
+    expect(issue_search_data_table.find_by_issue_id(issues[1].id)).to eq(nil)
+  end
+
+  it 're-raises other errors' do
+    allow(migration).to receive(:update_search_data).and_raise(ActiveRecord::StatementTimeout)
+
+    expect { migration.perform(issues[0].id, issues[5].id, :issues, :id, 2, 50) }.to raise_error(ActiveRecord::StatementTimeout)
+  end
+end
--- a/spec/lib/gitlab/import_export/all_models.yml
+++ b/spec/lib/gitlab/import_export/all_models.yml
@@ -34,6 +34,7 @@ issues:
 - issuable_severity
 - issuable_sla
 - issue_assignees
+- search_data
 - closed_by
 - epic_issue
 - epic
@@ -627,6 +628,8 @@ issuable_severity:
 issue_assignees:
 - issue
 - assignee
+search_data:
+- issue
 merge_request_assignees:
 - merge_request
 - assignee

--- a/spec/models/concerns/pg_full_text_searchable_spec.rb
+++ b/spec/models/concerns/pg_full_text_searchable_spec.rb
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe PgFullTextSearchable do
+  let(:project) { create(:project) }
+
+  let(:model_class) do
+    Class.new(ActiveRecord::Base) do
+      include PgFullTextSearchable
+
+      self.table_name = 'issues'
+
+      belongs_to :project
+      has_one :search_data, class_name: 'Issues::SearchData'
+
+      def persist_pg_full_text_search_vector(search_vector)
+        Issues::SearchData.upsert({ project_id: project_id, issue_id: id, search_vector: search_vector }, unique_by: %i(project_id issue_id))
+      end
+
+      def self.name
+        'Issue'
+      end
+    end
+  end
+
+  describe '.pg_full_text_searchable' do
+    it 'sets pg_full_text_searchable_columns' do
+      model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }]
+
+      expect(model_class.pg_full_text_searchable_columns).to eq({ 'title' => 'A' })
+    end
+
+    it 'raises an error when called twice' do
+      model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }]
+
+      expect { model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }] }.to raise_error('Full text search columns already defined!')
+    end
+  end
+
+  describe 'after commit hook' do
+    let(:model) { model_class.create!(project: project) }
+
+    before do
+      model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }]
+    end
+
+    context 'when specified columns are changed' do
+      it 'calls update_search_data!' do
+        expect(model).to receive(:update_search_data!)
+
+        model.update!(title: 'A new title')
+      end
+    end
+
+    context 'when specified columns are not changed' do
+      it 'does not enqueue worker' do
+        expect(model).not_to receive(:update_search_data!)
+
+        model.update!(description: 'A new description')
+      end
+    end
+  end
+
+  describe '.pg_full_text_search' do
+    let(:english) { model_class.create!(project: project, title: 'title', description: 'something english') }
+    let(:with_accent) { model_class.create!(project: project, title: 'Jürgen', description: 'Ærøskøbing') }
+    let(:japanese) { model_class.create!(project: project, title: '日本語 title', description: 'another english description') }
+
+    before do
+      model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
+
+      [english, with_accent, japanese].each(&:update_search_data!)
+    end
+
+    it 'searches across all fields' do
+      expect(model_class.pg_full_text_search('title english')).to contain_exactly(english, japanese)
+    end
+
+    it 'searches for exact term with quotes' do
+      expect(model_class.pg_full_text_search('"something english"')).to contain_exactly(english)
+    end
+
+    it 'ignores accents' do
+      expect(model_class.pg_full_text_search('jurgen')).to contain_exactly(with_accent)
+    end
+
+    it 'does not support searching by non-Latin characters' do
+      expect(model_class.pg_full_text_search('日本')).to be_empty
+    end
+  end
+
+  describe '#update_search_data!' do
+    let(:model) { model_class.create!(project: project, title: 'title', description: 'description') }
+
+    before do
+      model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
+    end
+
+    it 'sets the correct weights' do
+      model.update_search_data!
+
+      expect(model.search_data.search_vector).to match(/'titl':1A/)
+      expect(model.search_data.search_vector).to match(/'descript':2B/)
+    end
+
+    context 'with accented and non-Latin characters' do
+      let(:model) { model_class.create!(project: project, title: '日本語', description: 'Jürgen') }
+
+      it 'transliterates accented characters and removes non-Latin ones' do
+        model.update_search_data!
+
+        expect(model.search_data.search_vector).not_to match(/日本語/)
+        expect(model.search_data.search_vector).to match(/jurgen/)
+      end
+    end
+
+    context 'when upsert times out' do
+      it 're-raises the exception' do
+        expect(Issues::SearchData).to receive(:upsert).once.and_raise(ActiveRecord::StatementTimeout)
+
+        expect { model.update_search_data! }.to raise_error(ActiveRecord::StatementTimeout)
+      end
+    end
+
+    context 'with strings that go over tsvector limit', :delete do
+      let(:long_string) { Array.new(30_000) { SecureRandom.hex }.join(' ') }
+      let(:model) { model_class.create!(project: project, title: 'title', description: long_string) }
+
+      it 'does not raise an exception' do
+        expect(Gitlab::AppJsonLogger).to receive(:error).with(
+          a_hash_including(class: model_class.name, model_id: model.id)
+        )
+
+        expect { model.update_search_data! }.not_to raise_error
+
+        expect(model.search_data).to eq(nil)
+      end
+    end
+
+    context 'when model class does not implement persist_pg_full_text_search_vector' do
+      let(:model_class) do
+        Class.new(ActiveRecord::Base) do
+          include PgFullTextSearchable
+
+          self.table_name = 'issues'
+
+          belongs_to :project
+          has_one :search_data, class_name: 'Issues::SearchData'
+
+          def self.name
+            'Issue'
+          end
+        end
+      end
+
+      it 'raises an error' do
+        expect { model.update_search_data! }.to raise_error(NotImplementedError)
+      end
+    end
+  end
+end