Commit 991bf24e authored by Sean McGivern's avatar Sean McGivern

Use latest_merge_request_diff association

Compared to the merge_request_diff association:

1. It's simpler to query. The query uses a foreign key to the
   merge_request_diffs table, so no ordering is necessary.
2. It's faster for preloading. The merge_request_diff association has to load
   every diff for the MRs in the set, then discard all but the most recent for
   each. This association means that Rails can just query for N diffs from N
   MRs.
3. It's more complicated to update. This is a bidirectional foreign key, so we
   need to update two tables when adding a diff record. This also means we need
   to handle this as a special case when importing a GitLab project.

There is some juggling with this association in the merge request model:

* `MergeRequest#latest_merge_request_diff` is _always_ the latest diff.
* `MergeRequest#merge_request_diff` reuses
  `MergeRequest#latest_merge_request_diff` unless:
    * Arguments are passed. These are typically to force-reload the association.
    * It doesn't exist. That means we might be trying to implicitly create a
      diff. This only seems to happen in specs.
    * The association is already loaded. This is important for the reasons
      explained in the comment, which I'll reiterate here: if we a) load a
      non-latest diff, then b) get its `merge_request`, then c) get that MR's
      `merge_request_diff`, we should get the diff we loaded in c), even though
      that's not the latest diff.

Basically, `MergeRequest#merge_request_diff` is the latest diff in most cases,
but not quite all.
parent e548c613
......@@ -150,7 +150,7 @@ module IssuableCollections
when 'MergeRequest'
[
:source_project, :target_project, :author, :assignee, :labels, :milestone,
head_pipeline: :project, target_project: :namespace, merge_request_diff: :merge_request_diff_commits
head_pipeline: :project, target_project: :namespace, latest_merge_request_diff: :merge_request_diff_commits
]
end
end
......
......@@ -243,7 +243,7 @@ module Ci
@merge_request ||=
begin
merge_requests = MergeRequest.includes(:merge_request_diff)
merge_requests = MergeRequest.includes(:latest_merge_request_diff)
.where(source_branch: ref,
source_project: pipeline.project)
.reorder(iid: :desc)
......
module ManualInverseAssociation
extend ActiveSupport::Concern
module ClassMethods
def manual_inverse_association(association, inverse)
define_method(association) do |*args|
super(*args).tap do |value|
next unless value
child_association = value.association(inverse)
child_association.set_inverse_instance(self)
child_association.target = self
end
end
end
end
end
......@@ -5,6 +5,8 @@ class MergeRequest < ActiveRecord::Base
include Referable
include IgnorableColumn
include TimeTrackable
include ManualInverseAssociation
include EachBatch
ignore_column :locked_at,
:ref_fetched
......@@ -14,9 +16,28 @@ class MergeRequest < ActiveRecord::Base
belongs_to :merge_user, class_name: "User"
has_many :merge_request_diffs
has_one :merge_request_diff,
-> { order('merge_request_diffs.id DESC') }, inverse_of: :merge_request
belongs_to :latest_merge_request_diff, class_name: 'MergeRequestDiff'
manual_inverse_association :latest_merge_request_diff, :merge_request
# This is the same as latest_merge_request_diff unless:
# 1. There are arguments - in which case we might be trying to force-reload.
# 2. This association is already loaded.
# 3. The latest diff does not exist.
#
# The second one in particular is important - MergeRequestDiff#merge_request
# is the inverse of MergeRequest#merge_request_diff, which means it may not be
# the latest diff, because we could have loaded any diff from this particular
# MR. If we haven't already loaded a diff, then it's fine to load the latest.
def merge_request_diff(*args)
fallback = latest_merge_request_diff if args.empty? && !association(:merge_request_diff).loaded?
fallback || super
end
belongs_to :head_pipeline, foreign_key: "head_pipeline_id", class_name: "Ci::Pipeline"
has_many :events, as: :target, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
......@@ -167,6 +188,22 @@ class MergeRequest < ActiveRecord::Base
where("merge_requests.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
end
# This is used after project import, to reset the IDs to the correct
# values. It is not intended to be called without having already scoped the
# relation.
def self.set_latest_merge_request_diff_ids!
update = '
latest_merge_request_diff_id = (
SELECT MAX(id)
FROM merge_request_diffs
WHERE merge_requests.id = merge_request_diffs.merge_request_id
)'.squish
self.each_batch do |batch|
batch.update_all(update)
end
end
WIP_REGEX = /\A\s*(\[WIP\]\s*|WIP:\s*|WIP\s+)+\s*/i.freeze
def self.work_in_progress?(title)
......
......@@ -2,6 +2,7 @@ class MergeRequestDiff < ActiveRecord::Base
include Sortable
include Importable
include Gitlab::EncodingHelper
include ManualInverseAssociation
# Prevent store of diff if commits amount more then 500
COMMITS_SAFE_SIZE = 100
......@@ -10,6 +11,8 @@ class MergeRequestDiff < ActiveRecord::Base
VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze
belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff
has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) }
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
......@@ -194,7 +197,7 @@ class MergeRequestDiff < ActiveRecord::Base
end
def latest?
self == merge_request.merge_request_diff
self.id == merge_request.latest_merge_request_diff_id
end
def compare_with(sha)
......
......@@ -35,7 +35,7 @@ module MergeRequests
# target branch manually
def close_merge_requests
commit_ids = @commits.map(&:id)
merge_requests = @project.merge_requests.preload(:merge_request_diff).opened.where(target_branch: @branch_name).to_a
merge_requests = @project.merge_requests.preload(:latest_merge_request_diff).opened.where(target_branch: @branch_name).to_a
merge_requests = merge_requests.select(&:diff_head_commit)
merge_requests = merge_requests.select do |merge_request|
......
---
title: Make finding most recent merge request diffs more efficient
merge_request:
author:
type: performance
# This is identical to the stolen background migration, which already has specs.
class PopulateMergeRequestsLatestMergeRequestDiffIdTakeTwo < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 1_000
class MergeRequest < ActiveRecord::Base
self.table_name = 'merge_requests'
include ::EachBatch
end
disable_ddl_transaction!
def up
Gitlab::BackgroundMigration.steal('PopulateMergeRequestsLatestMergeRequestDiffId')
update = '
latest_merge_request_diff_id = (
SELECT MAX(id)
FROM merge_request_diffs
WHERE merge_requests.id = merge_request_diffs.merge_request_id
)'.squish
MergeRequest.where(latest_merge_request_diff_id: nil).each_batch(of: BATCH_SIZE) do |relation|
relation.update_all(update)
end
end
end
......@@ -21,7 +21,7 @@ module API
return merge_requests if args[:view] == 'simple'
merge_requests
.preload(:notes, :author, :assignee, :milestone, :merge_request_diff, :labels, :timelogs)
.preload(:notes, :author, :assignee, :milestone, :latest_merge_request_diff, :labels, :timelogs)
end
params :merge_requests_params do
......
......@@ -60,6 +60,8 @@ module Gitlab
end
end
@project.merge_requests.set_latest_merge_request_diff_ids!
@saved
end
......
......@@ -88,6 +88,7 @@ merge_requests:
- metrics
- timelogs
- head_pipeline
- latest_merge_request_diff
merge_request_diff:
- merge_request
- merge_request_diff_commits
......
......@@ -117,6 +117,12 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
expect(st_commits.first[:committed_date]).to be_kind_of(Time)
end
it 'has the correct data for merge request latest_merge_request_diff' do
MergeRequest.find_each do |merge_request|
expect(merge_request.latest_merge_request_diff_id).to eq(merge_request.merge_request_diffs.maximum(:id))
end
end
it 'has labels associated to label links, associated to issues' do
expect(Label.first.label_links.first.target).not_to be_nil
end
......
require 'spec_helper'
describe ManualInverseAssociation do
let(:model) do
Class.new(MergeRequest) do
belongs_to :manual_association, class_name: 'MergeRequestDiff', foreign_key: :latest_merge_request_diff_id
manual_inverse_association :manual_association, :merge_request
end
end
before do
stub_const("#{described_class}::Model", model)
end
let(:instance) { create(:merge_request).becomes(model) }
describe '.manual_inverse_association' do
context 'when the relation exists' do
before do
instance.create_merge_request_diff
instance.reload
end
it 'loads the relation' do
expect(instance.manual_association).to be_an_instance_of(MergeRequestDiff)
end
it 'does not perform extra queries after loading' do
instance.manual_association
expect { instance.manual_association.merge_request }
.not_to exceed_query_limit(0)
end
it 'passes arguments to the default association method, to allow reloading' do
query_count = ActiveRecord::QueryRecorder.new do
instance.manual_association
instance.manual_association(true)
end.count
expect(query_count).to eq(2)
end
end
context 'when the relation does not return a value' do
it 'does not try to set an inverse' do
expect(instance.manual_association).to be_nil
end
end
end
end
......@@ -18,8 +18,8 @@ describe MergeRequestDiff do
let!(:first_diff) { mr.merge_request_diff }
let!(:last_diff) { mr.create_merge_request_diff }
it { expect(last_diff.latest?).to be_truthy }
it { expect(first_diff.latest?).to be_falsey }
it { expect(last_diff.reload).to be_latest }
it { expect(first_diff.reload).not_to be_latest }
end
describe '#diffs' do
......@@ -29,7 +29,7 @@ describe MergeRequestDiff do
context 'when the :ignore_whitespace_change option is set' do
it 'creates a new compare object instead of loading from the DB' do
expect(mr_diff).not_to receive(:load_diffs)
expect(Gitlab::Git::Compare).to receive(:new).and_call_original
expect(mr_diff.compare).to receive(:diffs).and_call_original
mr_diff.raw_diffs(ignore_whitespace_change: true)
end
......
......@@ -79,6 +79,43 @@ describe MergeRequest do
end
end
describe '.set_latest_merge_request_diff_ids!' do
def create_merge_request_with_diffs(source_branch, diffs: 2)
params = {
target_project: project,
target_branch: 'master',
source_project: project,
source_branch: source_branch
}
create(:merge_request, params).tap do |mr|
diffs.times { mr.merge_request_diffs.create }
end
end
let(:project) { create(:project) }
it 'sets IDs for merge requests, whether they are already set or not' do
merge_requests = [
create_merge_request_with_diffs('feature'),
create_merge_request_with_diffs('feature-conflict'),
create_merge_request_with_diffs('wip', diffs: 0),
create_merge_request_with_diffs('csv')
]
merge_requests.take(2).each do |merge_request|
merge_request.update_column(:latest_merge_request_diff_id, nil)
end
expected = merge_requests.map do |merge_request|
merge_request.merge_request_diffs.maximum(:id)
end
expect { project.merge_requests.set_latest_merge_request_diff_ids! }
.to change { merge_requests.map { |mr| mr.reload.latest_merge_request_diff_id } }.to(expected)
end
end
describe '#target_branch_sha' do
let(:project) { create(:project, :repository) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment