Commit 2e6e8ea8 authored by Mario de la Ossa's avatar Mario de la Ossa

Enable incremental Elasticsearch wiki indexing

This allows us to only index changes between last indexing operation and
current. Also allows us to delete removed wiki files.
parent d4b7f9ab
# frozen_string_literal: true
module Git
class WikiPushService < ::BaseService
def execute
# This is used in EE
end
end
end
Git::WikiPushService.prepend(EE::Git::WikiPushService)
...@@ -30,15 +30,17 @@ class PostReceive ...@@ -30,15 +30,17 @@ class PostReceive
private private
def identify_user(post_received)
post_received.identify.tap do |user|
log("Triggered hook for non-existing user \"#{post_received.identifier}\"") unless user
end
end
def process_project_changes(post_received) def process_project_changes(post_received)
changes = [] changes = []
refs = Set.new refs = Set.new
@user = post_received.identify user = identify_user(post_received)
return false unless user
unless @user
log("Triggered hook for non-existing user \"#{post_received.identifier}\"")
return false
end
post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index| post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index|
service_klass = service_klass =
...@@ -51,7 +53,7 @@ class PostReceive ...@@ -51,7 +53,7 @@ class PostReceive
if service_klass if service_klass
service_klass.new( service_klass.new(
post_received.project, post_received.project,
@user, user,
oldrev: oldrev, oldrev: oldrev,
newrev: newrev, newrev: newrev,
ref: ref, ref: ref,
...@@ -64,7 +66,7 @@ class PostReceive ...@@ -64,7 +66,7 @@ class PostReceive
refs << ref refs << ref
end end
after_project_changes_hooks(post_received, @user, refs.to_a, changes) after_project_changes_hooks(post_received, user, refs.to_a, changes)
end end
def after_project_changes_hooks(post_received, user, refs, changes) def after_project_changes_hooks(post_received, user, refs, changes)
...@@ -76,6 +78,11 @@ class PostReceive ...@@ -76,6 +78,11 @@ class PostReceive
post_received.project.touch(:last_activity_at, :last_repository_updated_at) post_received.project.touch(:last_activity_at, :last_repository_updated_at)
post_received.project.wiki.repository.expire_statistics_caches post_received.project.wiki.repository.expire_statistics_caches
ProjectCacheWorker.perform_async(post_received.project.id, [], [:wiki_size]) ProjectCacheWorker.perform_async(post_received.project.id, [], [:wiki_size])
user = identify_user(post_received)
return false unless user
::Git::WikiPushService.new(post_received.project, user, changes: post_received.enum_for(:changes_refs)).execute
end end
def log(message) def log(message)
......
# frozen_string_literal: true
class AddWikiColumnsToIndexStatus < ActiveRecord::Migration[5.1]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :index_statuses, :last_wiki_commit, :binary
add_column :index_statuses, :wiki_indexed_at, :datetime_with_timezone
end
end
...@@ -1572,6 +1572,8 @@ ActiveRecord::Schema.define(version: 20190613030606) do ...@@ -1572,6 +1572,8 @@ ActiveRecord::Schema.define(version: 20190613030606) do
t.string "last_commit" t.string "last_commit"
t.datetime "created_at", null: false t.datetime "created_at", null: false
t.datetime "updated_at", null: false t.datetime "updated_at", null: false
t.binary "last_wiki_commit"
t.datetime_with_timezone "wiki_indexed_at"
t.index ["project_id"], name: "index_index_statuses_on_project_id", unique: true, using: :btree t.index ["project_id"], name: "index_index_statuses_on_project_id", unique: true, using: :btree
end end
......
...@@ -23,9 +23,9 @@ module Elastic ...@@ -23,9 +23,9 @@ module Elastic
self.__elasticsearch__.client self.__elasticsearch__.client
end end
def index_wiki_blobs def index_wiki_blobs(to_sha = nil)
if ::Gitlab::CurrentSettings.elasticsearch_experimental_indexer? if ::Gitlab::CurrentSettings.elasticsearch_experimental_indexer?
ElasticCommitIndexerWorker.perform_async(project.id, nil, nil, true) ElasticCommitIndexerWorker.perform_async(project.id, nil, to_sha, true)
else else
project.wiki.index_blobs project.wiki.index_blobs
end end
......
...@@ -15,20 +15,10 @@ module EE ...@@ -15,20 +15,10 @@ module EE
[::Gitlab.config.build_gitlab_kerberos_url, '/', full_path, '.git'].join('') [::Gitlab.config.build_gitlab_kerberos_url, '/', full_path, '.git'].join('')
end end
def update_elastic_index
index_wiki_blobs if project.use_elasticsearch?
end
def path_to_repo def path_to_repo
@path_to_repo ||= @path_to_repo ||=
File.join(::Gitlab.config.repositories.storages[project.repository_storage].legacy_disk_path, File.join(::Gitlab.config.repositories.storages[project.repository_storage].legacy_disk_path,
"#{disk_path}.git") "#{disk_path}.git")
end end
override :update_project_activity
def update_project_activity
update_elastic_index
super
end
end end
end end
# frozen_string_literal: true # frozen_string_literal: true
class IndexStatus < ApplicationRecord class IndexStatus < ApplicationRecord
include ::ShaAttribute
belongs_to :project belongs_to :project
sha_attribute :last_wiki_commit
validates :project_id, uniqueness: true, presence: true validates :project_id, uniqueness: true, presence: true
scope :for_project, ->(project_id) { where(project_id: project_id) } scope :for_project, ->(project_id) { where(project_id: project_id) }
......
# frozen_string_literal: true
module EE
module Git
module WikiPushService
def execute
super
return unless project.use_elasticsearch?
# Check if one of the changes we got was for the default branch. If it was, trigger an ES update
params[:changes].each do |_oldrev, newrev, ref|
branch_name = ::Gitlab::Git.ref_name(ref)
next unless project.wiki.default_branch == branch_name
project.wiki.index_wiki_blobs(newrev)
end
end
end
end
end
...@@ -21,17 +21,9 @@ module EE ...@@ -21,17 +21,9 @@ module EE
def process_wiki_changes(post_received) def process_wiki_changes(post_received)
super super
update_wiki_es_indexes(post_received)
if ::Gitlab::Geo.primary? if ::Gitlab::Geo.primary?
::Geo::RepositoryUpdatedService.new(post_received.project.wiki.repository).execute ::Geo::RepositoryUpdatedService.new(post_received.project.wiki.repository).execute
end end
end end
def update_wiki_es_indexes(post_received)
return unless post_received.project.use_elasticsearch?
post_received.project.wiki.index_wiki_blobs
end
end end
end end
---
title: Enable incremental elasticsearch index updates for wikis
merge_request: 14057
author:
type: fixed
...@@ -42,7 +42,7 @@ module Gitlab ...@@ -42,7 +42,7 @@ module Gitlab
end end
# Use the eager-loaded association if available. # Use the eager-loaded association if available.
@index_status = project.index_status unless wiki? @index_status = project.index_status
end end
def run(to_sha = nil) def run(to_sha = nil)
...@@ -51,12 +51,12 @@ module Gitlab ...@@ -51,12 +51,12 @@ module Gitlab
head_commit = repository.try(:commit) head_commit = repository.try(:commit)
if repository.nil? || !repository.exists? || repository.empty? || head_commit.nil? if repository.nil? || !repository.exists? || repository.empty? || head_commit.nil?
update_index_status(Gitlab::Git::BLANK_SHA) unless wiki? update_index_status(Gitlab::Git::BLANK_SHA)
return return
end end
run_indexer!(to_sha) run_indexer!(to_sha)
update_index_status(to_sha) unless wiki? update_index_status(to_sha)
true true
end end
...@@ -96,7 +96,8 @@ module Gitlab ...@@ -96,7 +96,8 @@ module Gitlab
repository.delete_index_for_commits_and_blobs repository.delete_index_for_commits_and_blobs
end end
command = if wiki? command =
if wiki?
[path_to_indexer, "--blob-type=wiki_blob", "--skip-commits", project.id.to_s, repository_path] [path_to_indexer, "--blob-type=wiki_blob", "--skip-commits", project.id.to_s, repository_path]
else else
[path_to_indexer, project.id.to_s, repository_path] [path_to_indexer, project.id.to_s, repository_path]
...@@ -110,8 +111,12 @@ module Gitlab ...@@ -110,8 +111,12 @@ module Gitlab
end end
def last_commit def last_commit
if wiki?
index_status&.last_wiki_commit
else
index_status&.last_commit index_status&.last_commit
end end
end
def from_sha def from_sha
repository_contains_last_indexed_commit? ? last_commit : Gitlab::Git::EMPTY_TREE_ID repository_contains_last_indexed_commit? ? last_commit : Gitlab::Git::EMPTY_TREE_ID
...@@ -150,7 +155,15 @@ module Gitlab ...@@ -150,7 +155,15 @@ module Gitlab
sha = head_commit.try(:sha) sha = head_commit.try(:sha)
sha ||= Gitlab::Git::BLANK_SHA sha ||= Gitlab::Git::BLANK_SHA
@index_status.update(last_commit: sha, indexed_at: Time.now)
attributes =
if wiki?
{ last_wiki_commit: sha, wiki_indexed_at: Time.now }
else
{ last_commit: sha, indexed_at: Time.now }
end
@index_status.update(attributes)
project.reload_index_status project.reload_index_status
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
......
...@@ -32,6 +32,7 @@ describe 'Project elastic search', :js, :elastic do ...@@ -32,6 +32,7 @@ describe 'Project elastic search', :js, :elastic do
it 'finds wiki pages' do it 'finds wiki pages' do
project.wiki.create_page('test.md', 'Test searching for a wiki page') project.wiki.create_page('test.md', 'Test searching for a wiki page')
project.wiki.index_wiki_blobs
expect_search_result(scope: 'Wiki', term: 'Test', result: 'Test searching for a wiki page') expect_search_result(scope: 'Wiki', term: 'Test', result: 'Test searching for a wiki page')
end end
......
...@@ -38,14 +38,6 @@ describe Gitlab::Elastic::Indexer do ...@@ -38,14 +38,6 @@ describe Gitlab::Elastic::Indexer do
project.wiki.create_page('test.md', '# term') project.wiki.create_page('test.md', '# term')
end end
it 'does not ask for IndexStatus' do
expect(project).not_to receive(:index_status)
expect(project.wiki).not_to receive(:index_status)
expect_popen.and_return(popen_success)
indexer.run
end
it 'raises if it cannot find gitlab-elasticsearch-indexer' do it 'raises if it cannot find gitlab-elasticsearch-indexer' do
expect(described_class).to receive(:experimental_indexer_present?).and_return(false) expect(described_class).to receive(:experimental_indexer_present?).and_return(false)
......
...@@ -40,7 +40,7 @@ describe ProjectWiki, :elastic do ...@@ -40,7 +40,7 @@ describe ProjectWiki, :elastic do
project.wiki.index_wiki_blobs project.wiki.index_wiki_blobs
end end
it 'indexes inside Rails if experiemntal indexer is not enabled' do it 'indexes inside Rails if experimental indexer is not enabled' do
stub_ee_application_setting(elasticsearch_experimental_indexer: false) stub_ee_application_setting(elasticsearch_experimental_indexer: false)
expect(project.wiki).to receive(:index_blobs) expect(project.wiki).to receive(:index_blobs)
...@@ -48,4 +48,23 @@ describe ProjectWiki, :elastic do ...@@ -48,4 +48,23 @@ describe ProjectWiki, :elastic do
project.wiki.index_wiki_blobs project.wiki.index_wiki_blobs
end end
it 'can delete wiki pages' do
expect(project.wiki.search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1)
Sidekiq::Testing.inline! do
project.wiki.find_page('omega_page').delete
last_commit = project.wiki.repository.commit.sha
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(last_commit).and_call_original
end
project.wiki.index_wiki_blobs(last_commit)
Gitlab::Elastic::Helper.refresh_index
end
expect(project.wiki.search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(0)
end
end end
# frozen_string_literal: true
require 'spec_helper'
describe Git::WikiPushService do
include RepoHelpers
let(:gl_repository) { "wiki-#{project.id}" }
let(:key) { create(:key, user: project.owner) }
let(:key_id) { key.shell_id }
let(:project) { create(:project, :repository, :wiki_repo) }
let(:post_received) { ::Gitlab::GitPostReceive.new(project, key_id, changes, {}) }
before do
allow(post_received).to receive(:identify).and_return(project.owner)
end
context 'when elasticsearch is enabled' do
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
end
describe 'when changes include master ref' do
let(:changes) { +"123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag\n423423 797823 refs/heads/master" }
it 'triggers a wiki update' do
expect(project.wiki).to receive(:index_wiki_blobs).with("797823")
described_class.new(project, project.owner, changes: post_received.enum_for(:changes_refs)).execute
end
end
describe 'when changes do not include master ref' do
let(:changes) { +"123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag" }
it 'does not trigger a wiki update' do
expect(project.wiki).not_to receive(:index_wiki_blobs)
described_class.new(project, project.owner, changes: post_received.enum_for(:changes_refs)).execute
end
end
end
context 'when elasticsearch is disabled' do
before do
stub_ee_application_setting(elasticsearch_search: false, elasticsearch_indexing: false)
end
describe 'when changes include master ref' do
let(:changes) { +"123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag\n423423 797823 refs/heads/master" }
it 'does nothing even if changes include master ref' do
expect(project.wiki).not_to receive(:index_wiki_blobs)
described_class.new(project, project.owner, changes: post_received.enum_for(:changes_refs)).execute
end
end
end
end
...@@ -3,8 +3,10 @@ require 'spec_helper' ...@@ -3,8 +3,10 @@ require 'spec_helper'
describe PostReceive do describe PostReceive do
let(:changes) { "123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag" } let(:changes) { "123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag" }
let(:changes_with_master) { "#{changes}\n423423 797823 refs/heads/master" }
let(:wrongly_encoded_changes) { changes.encode("ISO-8859-1").force_encoding("UTF-8") } let(:wrongly_encoded_changes) { changes.encode("ISO-8859-1").force_encoding("UTF-8") }
let(:base64_changes) { Base64.encode64(wrongly_encoded_changes) } let(:base64_changes) { Base64.encode64(wrongly_encoded_changes) }
let(:base64_changes_with_master) { Base64.encode64(changes_with_master) }
let(:gl_repository) { "project-#{project.id}" } let(:gl_repository) { "project-#{project.id}" }
let(:key) { create(:key, user: project.owner) } let(:key) { create(:key, user: project.owner) }
let(:key_id) { key.shell_id } let(:key_id) { key.shell_id }
...@@ -69,11 +71,19 @@ describe PostReceive do ...@@ -69,11 +71,19 @@ describe PostReceive do
described_class.new.perform(gl_repository, key_id, base64_changes) described_class.new.perform(gl_repository, key_id, base64_changes)
end end
it 'triggers wiki index update when ElasticSearch is enabled', :elastic do it 'triggers wiki index update when ElasticSearch is enabled and pushed to master', :elastic do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true) stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs) expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes_with_master)
end
it 'does not trigger wiki index update when Elasticsearch is enabled and not pushed to master', :elastic do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
expect_any_instance_of(ProjectWiki).not_to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes) described_class.new.perform(gl_repository, key_id, base64_changes)
end end
...@@ -90,7 +100,7 @@ describe PostReceive do ...@@ -90,7 +100,7 @@ describe PostReceive do
it 'does not trigger wiki index update' do it 'does not trigger wiki index update' do
expect_any_instance_of(ProjectWiki).not_to receive(:index_wiki_blobs) expect_any_instance_of(ProjectWiki).not_to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes) described_class.new.perform(gl_repository, key_id, base64_changes_with_master)
end end
end end
...@@ -102,23 +112,25 @@ describe PostReceive do ...@@ -102,23 +112,25 @@ describe PostReceive do
it 'triggers wiki index update' do it 'triggers wiki index update' do
expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs) expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes) described_class.new.perform(gl_repository, key_id, base64_changes_with_master)
end end
end end
context 'when a group is enabled' do context 'when a group is enabled' do
let(:user) { create(:user) }
let(:group) { create(:group) } let(:group) { create(:group) }
let(:project) { create(:project, :wiki_repo, group: group) } let(:project) { create(:project, :wiki_repo, group: group) }
let(:key) { create(:key, user: group.owner) } let(:key) { create(:key, user: user) }
before do before do
create :elasticsearch_indexed_namespace, namespace: group create :elasticsearch_indexed_namespace, namespace: group
group.add_owner(user)
end end
it 'triggers wiki index update' do it 'triggers wiki index update' do
expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs) expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes) described_class.new.perform(gl_repository, key_id, base64_changes_with_master)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment