Commit aeb754af authored by Yannis Roussos's avatar Yannis Roussos

Merge branch '246489-fix-files-count-for-large-mr-diffs' into 'master'

Fix MR diff file counts for some historic data

See merge request gitlab-org/gitlab!41676
parents 8f027931 e646b94a
...@@ -17,6 +17,10 @@ class MergeRequestDiff < ApplicationRecord ...@@ -17,6 +17,10 @@ class MergeRequestDiff < ApplicationRecord
# diffs to external storage # diffs to external storage
EXTERNAL_DIFF_CUTOFF = 7.days.freeze EXTERNAL_DIFF_CUTOFF = 7.days.freeze
# The files_count column is a 2-byte signed integer. Look up the true value
# from the database if this sentinel is seen
FILES_COUNT_SENTINEL = 2**15 - 1
belongs_to :merge_request belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff manual_inverse_association :merge_request, :merge_request_diff
...@@ -202,6 +206,17 @@ class MergeRequestDiff < ApplicationRecord ...@@ -202,6 +206,17 @@ class MergeRequestDiff < ApplicationRecord
end end
end end
def files_count
db_value = read_attribute(:files_count)
case db_value
when nil, FILES_COUNT_SENTINEL
merge_request_diff_files.count
else
db_value
end
end
# This method will rely on repository branch sha # This method will rely on repository branch sha
# in case start_commit_sha is nil. Its necesarry for old merge request diff # in case start_commit_sha is nil. Its necesarry for old merge request diff
# created before version 8.4 to work # created before version 8.4 to work
...@@ -423,7 +438,7 @@ class MergeRequestDiff < ApplicationRecord ...@@ -423,7 +438,7 @@ class MergeRequestDiff < ApplicationRecord
# external storage. If external storage isn't an option for this diff, the # external storage. If external storage isn't an option for this diff, the
# method is a no-op. # method is a no-op.
def migrate_files_to_external_storage! def migrate_files_to_external_storage!
return if stored_externally? || !use_external_diff? || merge_request_diff_files.count == 0 return if stored_externally? || !use_external_diff? || files_count == 0
rows = build_merge_request_diff_files(merge_request_diff_files) rows = build_merge_request_diff_files(merge_request_diff_files)
rows = build_external_merge_request_diff_files(rows) rows = build_external_merge_request_diff_files(rows)
...@@ -449,7 +464,7 @@ class MergeRequestDiff < ApplicationRecord ...@@ -449,7 +464,7 @@ class MergeRequestDiff < ApplicationRecord
# If this diff isn't in external storage, the method is a no-op. # If this diff isn't in external storage, the method is a no-op.
def migrate_files_to_database! def migrate_files_to_database!
return unless stored_externally? return unless stored_externally?
return if merge_request_diff_files.count == 0 return if files_count == 0
rows = convert_external_diffs_to_database rows = convert_external_diffs_to_database
...@@ -666,7 +681,7 @@ class MergeRequestDiff < ApplicationRecord ...@@ -666,7 +681,7 @@ class MergeRequestDiff < ApplicationRecord
def set_count_columns def set_count_columns
update_columns( update_columns(
commits_count: merge_request_diff_commits.size, commits_count: merge_request_diff_commits.size,
files_count: merge_request_diff_files.size files_count: [FILES_COUNT_SENTINEL, merge_request_diff_files.size].min
) )
end end
......
---
title: Fix MR diff file counts for some historic data
merge_request: 41676
author:
type: fixed
...@@ -4,13 +4,18 @@ module Gitlab ...@@ -4,13 +4,18 @@ module Gitlab
module BackgroundMigration module BackgroundMigration
# Sets the MergeRequestDiff#files_count value for old rows # Sets the MergeRequestDiff#files_count value for old rows
class SetMergeRequestDiffFilesCount class SetMergeRequestDiffFilesCount
COUNT_SUBQUERY = <<~SQL # Some historic data has a *lot* of files. Apply a sentinel to these cases
FILES_COUNT_SENTINEL = 2**15 - 1
def self.count_subquery
<<~SQL
files_count = ( files_count = (
SELECT count(*) SELECT LEAST(#{FILES_COUNT_SENTINEL}, count(*))
FROM merge_request_diff_files FROM merge_request_diff_files
WHERE merge_request_diff_files.merge_request_diff_id = merge_request_diffs.id WHERE merge_request_diff_files.merge_request_diff_id = merge_request_diffs.id
) )
SQL SQL
end
class MergeRequestDiff < ActiveRecord::Base # rubocop:disable Style/Documentation class MergeRequestDiff < ActiveRecord::Base # rubocop:disable Style/Documentation
include EachBatch include EachBatch
...@@ -20,7 +25,7 @@ module Gitlab ...@@ -20,7 +25,7 @@ module Gitlab
def perform(start_id, end_id) def perform(start_id, end_id)
MergeRequestDiff.where(id: start_id..end_id).each_batch do |relation| MergeRequestDiff.where(id: start_id..end_id).each_batch do |relation|
relation.update_all(COUNT_SUBQUERY) relation.update_all(self.class.count_subquery)
end end
end end
end end
......
...@@ -13,11 +13,11 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem ...@@ -13,11 +13,11 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem
let(:project) { projects.create!(namespace_id: namespace.id) } let(:project) { projects.create!(namespace_id: namespace.id) }
let(:merge_request) { merge_requests.create!(source_branch: 'x', target_branch: 'master', target_project_id: project.id) } let(:merge_request) { merge_requests.create!(source_branch: 'x', target_branch: 'master', target_project_id: project.id) }
it 'fills the files_count column' do let!(:empty_diff) { merge_request_diffs.create!(merge_request_id: merge_request.id) }
empty_diff = merge_request_diffs.create!(merge_request_id: merge_request.id) let!(:filled_diff) { merge_request_diffs.create!(merge_request_id: merge_request.id) }
filled_diff = merge_request_diffs.create!(merge_request_id: merge_request.id)
3.times do |n| let!(:filled_diff_files) do
1.upto(3).map do |n|
merge_request_diff_files.create!( merge_request_diff_files.create!(
merge_request_diff_id: filled_diff.id, merge_request_diff_id: filled_diff.id,
relative_order: n, relative_order: n,
...@@ -31,10 +31,21 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem ...@@ -31,10 +31,21 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem
new_path: '' new_path: ''
) )
end end
end
it 'fills the files_count column' do
described_class.new.perform(empty_diff.id, filled_diff.id) described_class.new.perform(empty_diff.id, filled_diff.id)
expect(empty_diff.reload.files_count).to eq(0) expect(empty_diff.reload.files_count).to eq(0)
expect(filled_diff.reload.files_count).to eq(3) expect(filled_diff.reload.files_count).to eq(3)
end end
it 'uses the sentinel value if the actual count is too high' do
stub_const("#{described_class}::FILES_COUNT_SENTINEL", filled_diff_files.size - 1)
described_class.new.perform(empty_diff.id, filled_diff.id)
expect(empty_diff.reload.files_count).to eq(0)
expect(filled_diff.reload.files_count).to eq(described_class::FILES_COUNT_SENTINEL)
end
end end
...@@ -293,6 +293,7 @@ RSpec.describe MergeRequestDiff do ...@@ -293,6 +293,7 @@ RSpec.describe MergeRequestDiff do
it 'does nothing with an empty diff' do it 'does nothing with an empty diff' do
stub_external_diffs_setting(enabled: true) stub_external_diffs_setting(enabled: true)
MergeRequestDiffFile.where(merge_request_diff_id: diff.id).delete_all MergeRequestDiffFile.where(merge_request_diff_id: diff.id).delete_all
diff.save! # update files_count
expect(diff).not_to receive(:update!) expect(diff).not_to receive(:update!)
...@@ -675,8 +676,39 @@ RSpec.describe MergeRequestDiff do ...@@ -675,8 +676,39 @@ RSpec.describe MergeRequestDiff do
end end
describe '#files_count' do describe '#files_count' do
it 'returns number of diff files' do let_it_be(:merge_request) { create(:merge_request) }
expect(diff_with_commits.files_count).to eq(diff_with_commits.merge_request_diff_files.count)
let(:diff) { merge_request.merge_request_diff }
let(:actual_count) { diff.merge_request_diff_files.count }
it 'is set by default' do
expect(diff.read_attribute(:files_count)).to eq(actual_count)
end
it 'is set to the sentinel value if the actual value exceeds it' do
stub_const("#{described_class}::FILES_COUNT_SENTINEL", actual_count - 1)
diff.save! # update the files_count column with the stub in place
expect(diff.read_attribute(:files_count)).to eq(described_class::FILES_COUNT_SENTINEL)
end
it 'uses the cached count if present' do
diff.update_columns(files_count: actual_count + 1)
expect(diff.files_count).to eq(actual_count + 1)
end
it 'uses the actual count if nil' do
diff.update_columns(files_count: nil)
expect(diff.files_count).to eq(actual_count)
end
it 'uses the actual count if overflown' do
diff.update_columns(files_count: described_class::FILES_COUNT_SENTINEL)
expect(diff.files_count).to eq(actual_count)
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment