Commit 94a781d6 authored by Francisco Javier López's avatar Francisco Javier López

Merge branch 'vij-backfill-dp-stats' into 'master'

Backfill dependency proxy size in namespace stats

See merge request gitlab-org/gitlab!79937
parents 633ae56b e5289bec
# frozen_string_literal: true
class BackfillNamespaceStatisticsWithDependencyProxySize < Gitlab::Database::Migration[1.0]
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 500
MIGRATION = 'PopulateNamespaceStatistics'
disable_ddl_transaction!
def up
groups = exec_query <<~SQL
SELECT dependency_proxy_manifests.group_id FROM dependency_proxy_manifests
UNION
SELECT dependency_proxy_blobs.group_id from dependency_proxy_blobs
SQL
groups.rows.flatten.in_groups_of(BATCH_SIZE, false).each_with_index do |group_ids, index|
migrate_in(index * DELAY_INTERVAL, MIGRATION, [group_ids, [:dependency_proxy_size]])
end
end
def down
# no-op
end
end
bce595c1c6587e785bc49d6e5a7181b5cc0164f2201375ad82d4bd19c217cd35
\ No newline at end of file
......@@ -3,42 +3,14 @@
module EE
module Gitlab
module BackgroundMigration
# This class creates/updates those namespace statistics
# that haven't been created nor initialized.
# It also updates the related namespace statistics
module PopulateNamespaceStatistics
extend ::Gitlab::Utils::Override
override :perform
def perform(group_ids, statistics)
# Updating group statistics might involve calling Gitaly.
# For example, when calculating `wiki_size`, we will need
# to perform the request to check if the repo exists and
# also the repository size.
#
# The `allow_n_plus_1_calls` method is only intended for
# dev and test. It won't be raised in prod.
::Gitlab::GitalyClient.allow_n_plus_1_calls do
::Group.includes(:route, :namespace_statistics, group_wiki_repository: :shard).where(id: group_ids).each do |group|
upsert_namespace_statistics(group, statistics)
end
end
end
private
def upsert_namespace_statistics(group, statistics)
response = ::Groups::UpdateStatisticsService.new(group, statistics: statistics).execute
error_message("#{response.message} group: #{group.id}") if response.error?
end
def logger
@logger ||= ::Gitlab::BackgroundMigration::Logger.build
end
def error_message(message)
logger.error(message: "Namespace Statistics Migration: #{message}")
override :relation
def relation(group_ids)
::Group.includes(:route, :namespace_statistics, group_wiki_repository: :shard).where(id: group_ids)
end
end
end
......
......@@ -15,12 +15,9 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
let(:repo_size) { 123456 }
let(:expected_repo_size) { repo_size.megabytes }
let(:ids) { namespaces.pluck(:id) }
let(:migration) { described_class.new }
let(:statistics) { [] }
subject do
migration.perform(ids, statistics)
end
subject(:perform) { described_class.new.perform(ids, statistics) }
before do
allow_next(Repository).to receive(:size).and_return(repo_size)
......@@ -28,7 +25,7 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
context 'when group wikis are not enabled' do
it 'does not update wiki stats' do
subject
perform
expect(namespace_statistics.where(wiki_size: 0).count).to eq 2
end
......@@ -42,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
expect(Namespaces::ScheduleAggregationWorker).to receive(:perform_async).with(group1.id)
expect(Namespaces::ScheduleAggregationWorker).to receive(:perform_async).with(group2.id)
subject
perform
expect(namespace_statistics.count).to eq 2
......@@ -58,7 +55,7 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
it 'calls the statistics update service with just that stat' do
expect(Groups::UpdateStatisticsService).to receive(:new).with(anything, statistics: [:wiki_size]).twice.and_call_original
subject
perform
end
end
end
......@@ -5,9 +5,40 @@ module Gitlab
# This class creates/updates those namespace statistics
# that haven't been created nor initialized.
# It also updates the related namespace statistics
# This is only required in EE
class PopulateNamespaceStatistics
def perform(group_ids, statistics)
# Updating group statistics might involve calling Gitaly.
# For example, when calculating `wiki_size`, we will need
# to perform the request to check if the repo exists and
# also the repository size.
#
# The `allow_n_plus_1_calls` method is only intended for
# dev and test. It won't be raised in prod.
::Gitlab::GitalyClient.allow_n_plus_1_calls do
relation(group_ids).each do |group|
upsert_namespace_statistics(group, statistics)
end
end
end
private
def upsert_namespace_statistics(group, statistics)
response = ::Groups::UpdateStatisticsService.new(group, statistics: statistics).execute
error_message("#{response.message} group: #{group.id}") if response.error?
end
def logger
@logger ||= ::Gitlab::BackgroundMigration::Logger.build
end
def error_message(message)
logger.error(message: "Namespace Statistics Migration: #{message}")
end
def relation(group_ids)
Group.includes(:namespace_statistics).where(id: group_ids)
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
let_it_be(:namespaces) { table(:namespaces) }
let_it_be(:namespace_statistics) { table(:namespace_statistics) }
let_it_be(:dependency_proxy_manifests) { table(:dependency_proxy_manifests) }
let_it_be(:dependency_proxy_blobs) { table(:dependency_proxy_blobs) }
let!(:group1) { namespaces.create!(id: 10, type: 'Group', name: 'group1', path: 'group1') }
let!(:group2) { namespaces.create!(id: 20, type: 'Group', name: 'group2', path: 'group2') }
let!(:group1_manifest) do
dependency_proxy_manifests.create!(group_id: 10, size: 20, file_name: 'test-file', file: 'test', digest: 'abc123')
end
let!(:group2_manifest) do
dependency_proxy_manifests.create!(group_id: 20, size: 20, file_name: 'test-file', file: 'test', digest: 'abc123')
end
let!(:group1_stats) { namespace_statistics.create!(id: 10, namespace_id: 10) }
let(:ids) { namespaces.pluck(:id) }
let(:statistics) { [] }
subject(:perform) { described_class.new.perform(ids, statistics) }
it 'creates/updates all namespace_statistics and updates root storage statistics', :aggregate_failures do
expect(Namespaces::ScheduleAggregationWorker).to receive(:perform_async).with(group1.id)
expect(Namespaces::ScheduleAggregationWorker).to receive(:perform_async).with(group2.id)
expect { perform }.to change(namespace_statistics, :count).from(1).to(2)
namespace_statistics.all.each do |stat|
expect(stat.dependency_proxy_size).to eq 20
expect(stat.storage_size).to eq 20
end
end
context 'when just a stat is passed' do
let(:statistics) { [:dependency_proxy_size] }
it 'calls the statistics update service with just that stat' do
expect(Groups::UpdateStatisticsService)
.to receive(:new)
.with(anything, statistics: [:dependency_proxy_size])
.twice.and_call_original
perform
end
end
context 'when a statistics update fails' do
before do
error_response = instance_double(ServiceResponse, message: 'an error', error?: true)
allow_next_instance_of(Groups::UpdateStatisticsService) do |instance|
allow(instance).to receive(:execute).and_return(error_response)
end
end
it 'logs an error' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:error).twice
end
perform
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe BackfillNamespaceStatisticsWithDependencyProxySize do
let_it_be(:groups) { table(:namespaces) }
let_it_be(:group1) { groups.create!(id: 10, name: 'test1', path: 'test1', type: 'Group') }
let_it_be(:group2) { groups.create!(id: 20, name: 'test2', path: 'test2', type: 'Group') }
let_it_be(:group3) { groups.create!(id: 30, name: 'test3', path: 'test3', type: 'Group') }
let_it_be(:group4) { groups.create!(id: 40, name: 'test4', path: 'test4', type: 'Group') }
let_it_be(:dependency_proxy_blobs) { table(:dependency_proxy_blobs) }
let_it_be(:dependency_proxy_manifests) { table(:dependency_proxy_manifests) }
let_it_be(:group1_manifest) { create_manifest(10, 10) }
let_it_be(:group2_manifest) { create_manifest(20, 20) }
let_it_be(:group3_manifest) { create_manifest(30, 30) }
let_it_be(:group1_blob) { create_blob(10, 10) }
let_it_be(:group2_blob) { create_blob(20, 20) }
let_it_be(:group3_blob) { create_blob(30, 30) }
describe '#up' do
it 'correctly schedules background migrations' do
stub_const("#{described_class}::BATCH_SIZE", 2)
Sidekiq::Testing.fake! do
freeze_time do
migrate!
aggregate_failures do
expect(described_class::MIGRATION)
.to be_scheduled_migration([10, 30], ['dependency_proxy_size'])
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, [20], ['dependency_proxy_size'])
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
end
def create_manifest(group_id, size)
dependency_proxy_manifests.create!(
group_id: group_id,
size: size,
file_name: 'test-file',
file: 'test',
digest: 'abc123'
)
end
def create_blob(group_id, size)
dependency_proxy_blobs.create!(
group_id: group_id,
size: size,
file_name: 'test-file',
file: 'test'
)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment