Commit e6f21f01 authored by Nick Thomas's avatar Nick Thomas

Merge branch '6053-geo-slow-join-query-with-project_repository_states' into 'master'

Resolve "Geo: Slow JOIN query with project_repository_states"

Closes #6053

See merge request gitlab-org/gitlab-ee!5879
parents 53a6b2d9 5a59503c
......@@ -2134,6 +2134,7 @@ ActiveRecord::Schema.define(version: 20180612175636) do
add_index "projects", ["created_at"], name: "index_projects_on_created_at", using: :btree
add_index "projects", ["creator_id"], name: "index_projects_on_creator_id", using: :btree
add_index "projects", ["description"], name: "index_projects_on_description_trigram", using: :gin, opclasses: {"description"=>"gin_trgm_ops"}
add_index "projects", ["id", "repository_storage", "last_repository_updated_at"], name: "idx_projects_on_repository_storage_last_repository_updated_at", using: :btree
add_index "projects", ["id"], name: "index_projects_on_id_partial_for_visibility", unique: true, where: "(visibility_level = ANY (ARRAY[10, 20]))", using: :btree
add_index "projects", ["id"], name: "index_projects_on_mirror_and_mirror_trigger_builds_both_true", where: "((mirror IS TRUE) AND (mirror_trigger_builds IS TRUE))", using: :btree
add_index "projects", ["last_activity_at"], name: "index_projects_on_last_activity_at", using: :btree
......
module Geo
class RepositoryVerificationFinder
def initialize(shard_name: nil)
@shard_name = shard_name
end
def find_outdated_projects(batch_size:)
Project.select(:id)
.with_route
.joins(:repository_state)
.where(repository_outdated.or(wiki_outdated))
query = build_query_to_find_outdated_projects(batch_size: batch_size)
cte = Gitlab::SQL::CTE.new(:outdated_projects, query)
Project.with(cte.to_arel)
.from(cte.alias_to(projects_table))
.order(last_repository_updated_at_asc)
.limit(batch_size)
end
def find_unverified_projects(batch_size:)
relation =
Project.select(:id)
.with_route
.joins(left_join_repository_state)
.where(repository_never_verified)
.limit(batch_size)
relation = apply_shard_restriction(relation) if shard_name.present?
relation
end
def count_verified_repositories
......@@ -33,7 +41,21 @@ module Geo
Project.verification_failed_wikis.count
end
protected
private
attr_reader :shard_name
def build_query_to_find_outdated_projects(batch_size:)
query =
projects_table
.join(repository_state_table).on(project_id_matcher)
.project(projects_table[:id], projects_table[:last_repository_updated_at])
.where(repository_outdated.or(wiki_outdated))
.take(batch_size)
query = apply_shard_restriction(query) if shard_name.present?
query
end
def projects_table
Project.arel_table
......@@ -43,10 +65,14 @@ module Geo
ProjectRepositoryState.arel_table
end
def project_id_matcher
projects_table[:id].eq(repository_state_table[:project_id])
end
def left_join_repository_state
projects_table
.join(repository_state_table, Arel::Nodes::OuterJoin)
.on(projects_table[:id].eq(repository_state_table[:project_id]))
.on(project_id_matcher)
.join_sources
end
......@@ -67,5 +93,9 @@ module Geo
def last_repository_updated_at_asc
Gitlab::Database.nulls_last_order('projects.last_repository_updated_at', 'ASC')
end
def apply_shard_restriction(relation)
relation.where(projects_table[:repository_storage].eq(shard_name))
end
end
end
......@@ -36,7 +36,7 @@ module Geo
end
def finder
@finder ||= Geo::RepositoryVerificationFinder.new
@finder ||= Geo::RepositoryVerificationFinder.new(shard_name: shard_name)
end
def load_pending_resources
......@@ -51,17 +51,11 @@ module Geo
end
def find_unverified_project_ids(batch_size:)
shard_restriction(finder.find_unverified_projects(batch_size: batch_size))
.pluck(:id)
finder.find_unverified_projects(batch_size: batch_size).pluck(:id)
end
def find_outdated_project_ids(batch_size:)
shard_restriction(finder.find_outdated_projects(batch_size: batch_size))
.pluck(:id)
end
def shard_restriction(relation)
relation.where(repository_storage: shard_name)
finder.find_outdated_projects(batch_size: batch_size).pluck(:id)
end
end
end
......
---
title: Geo - Optimize query to return outdated projects that need to be reverified
merge_request: 5879
author:
type: performance
class AddIndexToProjectsOnRepositoryStorageLastRepositoryUpdatedAt < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'idx_projects_on_repository_storage_last_repository_updated_at'.freeze
disable_ddl_transaction!
def up
add_concurrent_index(
:projects,
[:id, :repository_storage, :last_repository_updated_at],
name: INDEX_NAME
)
end
def down
remove_concurrent_index_by_name(:projects, INDEX_NAME)
end
end
......@@ -53,12 +53,41 @@ describe Geo::RepositoryVerificationFinder, :postgresql do
expect(subject.find_outdated_projects(batch_size: 10)).to eq [less_active_project, project]
end
context 'with shard restriction' do
subject { described_class.new(shard_name: project.repository_storage) }
it 'does not return projects on other shards' do
project_other_shard = create(:project)
project_other_shard.update_column(:repository_storage, 'other')
create(:repository_state, :repository_outdated, project: project)
create(:repository_state, :repository_outdated, project: project_other_shard)
expect(subject.find_outdated_projects(batch_size: 10))
.to match_array(project)
end
end
end
describe '#find_unverified_projects' do
it 'returns projects that never have been verified' do
create(:repository_state, :repository_outdated)
create(:repository_state, :wiki_outdated)
expect(subject.find_unverified_projects(batch_size: 10))
.to match_array(project)
end
context 'with shard restriction' do
subject { described_class.new(shard_name: project.repository_storage) }
it 'does not return projects on other shards' do
project_other_shard = create(:project)
project_other_shard.update_column(:repository_storage, 'other')
expect(subject.find_unverified_projects(batch_size: 10))
.to match_array(project)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment