Commit e5289bec authored by Vijay Hawoldar's avatar Vijay Hawoldar Committed by Francisco Javier López

Backfill dependency proxy size in namespace stats

We recently added support for storing dependency proxy size at the
namespace statistics level. This will backfill for all namespaces that
have those assets.

Changelog: other
parent 87458f6c
# frozen_string_literal: true
class BackfillNamespaceStatisticsWithDependencyProxySize < Gitlab::Database::Migration[1.0]
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 500
MIGRATION = 'PopulateNamespaceStatistics'
disable_ddl_transaction!
def up
groups = exec_query <<~SQL
SELECT dependency_proxy_manifests.group_id FROM dependency_proxy_manifests
UNION
SELECT dependency_proxy_blobs.group_id from dependency_proxy_blobs
SQL
groups.rows.flatten.in_groups_of(BATCH_SIZE, false).each_with_index do |group_ids, index|
migrate_in(index * DELAY_INTERVAL, MIGRATION, [group_ids, [:dependency_proxy_size]])
end
end
def down
# no-op
end
end
bce595c1c6587e785bc49d6e5a7181b5cc0164f2201375ad82d4bd19c217cd35
\ No newline at end of file
......@@ -3,42 +3,14 @@
module EE
module Gitlab
module BackgroundMigration
# This class creates/updates those namespace statistics
# that haven't been created nor initialized.
# It also updates the related namespace statistics
module PopulateNamespaceStatistics
extend ::Gitlab::Utils::Override
override :perform
def perform(group_ids, statistics)
# Updating group statistics might involve calling Gitaly.
# For example, when calculating `wiki_size`, we will need
# to perform the request to check if the repo exists and
# also the repository size.
#
# The `allow_n_plus_1_calls` method is only intended for
# dev and test. It won't be raised in prod.
::Gitlab::GitalyClient.allow_n_plus_1_calls do
::Group.includes(:route, :namespace_statistics, group_wiki_repository: :shard).where(id: group_ids).each do |group|
upsert_namespace_statistics(group, statistics)
end
end
end
private
def upsert_namespace_statistics(group, statistics)
response = ::Groups::UpdateStatisticsService.new(group, statistics: statistics).execute
error_message("#{response.message} group: #{group.id}") if response.error?
end
def logger
@logger ||= ::Gitlab::BackgroundMigration::Logger.build
end
def error_message(message)
logger.error(message: "Namespace Statistics Migration: #{message}")
override :relation
def relation(group_ids)
::Group.includes(:route, :namespace_statistics, group_wiki_repository: :shard).where(id: group_ids)
end
end
end
......
......@@ -15,12 +15,9 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
let(:repo_size) { 123456 }
let(:expected_repo_size) { repo_size.megabytes }
let(:ids) { namespaces.pluck(:id) }
let(:migration) { described_class.new }
let(:statistics) { [] }
subject do
migration.perform(ids, statistics)
end
subject(:perform) { described_class.new.perform(ids, statistics) }
before do
allow_next(Repository).to receive(:size).and_return(repo_size)
......@@ -28,7 +25,7 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
context 'when group wikis are not enabled' do
it 'does not update wiki stats' do
subject
perform
expect(namespace_statistics.where(wiki_size: 0).count).to eq 2
end
......@@ -42,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
expect(Namespaces::ScheduleAggregationWorker).to receive(:perform_async).with(group1.id)
expect(Namespaces::ScheduleAggregationWorker).to receive(:perform_async).with(group2.id)
subject
perform
expect(namespace_statistics.count).to eq 2
......@@ -58,7 +55,7 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
it 'calls the statistics update service with just that stat' do
expect(Groups::UpdateStatisticsService).to receive(:new).with(anything, statistics: [:wiki_size]).twice.and_call_original
subject
perform
end
end
end
......@@ -5,9 +5,40 @@ module Gitlab
# This class creates/updates those namespace statistics
# that haven't been created nor initialized.
# It also updates the related namespace statistics
# This is only required in EE
class PopulateNamespaceStatistics
def perform(group_ids, statistics)
# Updating group statistics might involve calling Gitaly.
# For example, when calculating `wiki_size`, we will need
# to perform the request to check if the repo exists and
# also the repository size.
#
# The `allow_n_plus_1_calls` method is only intended for
# dev and test. It won't be raised in prod.
::Gitlab::GitalyClient.allow_n_plus_1_calls do
relation(group_ids).each do |group|
upsert_namespace_statistics(group, statistics)
end
end
end
private
def upsert_namespace_statistics(group, statistics)
response = ::Groups::UpdateStatisticsService.new(group, statistics: statistics).execute
error_message("#{response.message} group: #{group.id}") if response.error?
end
def logger
@logger ||= ::Gitlab::BackgroundMigration::Logger.build
end
def error_message(message)
logger.error(message: "Namespace Statistics Migration: #{message}")
end
def relation(group_ids)
Group.includes(:namespace_statistics).where(id: group_ids)
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::PopulateNamespaceStatistics do
let_it_be(:namespaces) { table(:namespaces) }
let_it_be(:namespace_statistics) { table(:namespace_statistics) }
let_it_be(:dependency_proxy_manifests) { table(:dependency_proxy_manifests) }
let_it_be(:dependency_proxy_blobs) { table(:dependency_proxy_blobs) }
let!(:group1) { namespaces.create!(id: 10, type: 'Group', name: 'group1', path: 'group1') }
let!(:group2) { namespaces.create!(id: 20, type: 'Group', name: 'group2', path: 'group2') }
let!(:group1_manifest) do
dependency_proxy_manifests.create!(group_id: 10, size: 20, file_name: 'test-file', file: 'test', digest: 'abc123')
end
let!(:group2_manifest) do
dependency_proxy_manifests.create!(group_id: 20, size: 20, file_name: 'test-file', file: 'test', digest: 'abc123')
end
let!(:group1_stats) { namespace_statistics.create!(id: 10, namespace_id: 10) }
let(:ids) { namespaces.pluck(:id) }
let(:statistics) { [] }
subject(:perform) { described_class.new.perform(ids, statistics) }
it 'creates/updates all namespace_statistics and updates root storage statistics', :aggregate_failures do
expect(Namespaces::ScheduleAggregationWorker).to receive(:perform_async).with(group1.id)
expect(Namespaces::ScheduleAggregationWorker).to receive(:perform_async).with(group2.id)
expect { perform }.to change(namespace_statistics, :count).from(1).to(2)
namespace_statistics.all.each do |stat|
expect(stat.dependency_proxy_size).to eq 20
expect(stat.storage_size).to eq 20
end
end
context 'when just a stat is passed' do
let(:statistics) { [:dependency_proxy_size] }
it 'calls the statistics update service with just that stat' do
expect(Groups::UpdateStatisticsService)
.to receive(:new)
.with(anything, statistics: [:dependency_proxy_size])
.twice.and_call_original
perform
end
end
context 'when a statistics update fails' do
before do
error_response = instance_double(ServiceResponse, message: 'an error', error?: true)
allow_next_instance_of(Groups::UpdateStatisticsService) do |instance|
allow(instance).to receive(:execute).and_return(error_response)
end
end
it 'logs an error' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:error).twice
end
perform
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe BackfillNamespaceStatisticsWithDependencyProxySize do
let_it_be(:groups) { table(:namespaces) }
let_it_be(:group1) { groups.create!(id: 10, name: 'test1', path: 'test1', type: 'Group') }
let_it_be(:group2) { groups.create!(id: 20, name: 'test2', path: 'test2', type: 'Group') }
let_it_be(:group3) { groups.create!(id: 30, name: 'test3', path: 'test3', type: 'Group') }
let_it_be(:group4) { groups.create!(id: 40, name: 'test4', path: 'test4', type: 'Group') }
let_it_be(:dependency_proxy_blobs) { table(:dependency_proxy_blobs) }
let_it_be(:dependency_proxy_manifests) { table(:dependency_proxy_manifests) }
let_it_be(:group1_manifest) { create_manifest(10, 10) }
let_it_be(:group2_manifest) { create_manifest(20, 20) }
let_it_be(:group3_manifest) { create_manifest(30, 30) }
let_it_be(:group1_blob) { create_blob(10, 10) }
let_it_be(:group2_blob) { create_blob(20, 20) }
let_it_be(:group3_blob) { create_blob(30, 30) }
describe '#up' do
it 'correctly schedules background migrations' do
stub_const("#{described_class}::BATCH_SIZE", 2)
Sidekiq::Testing.fake! do
freeze_time do
migrate!
aggregate_failures do
expect(described_class::MIGRATION)
.to be_scheduled_migration([10, 30], ['dependency_proxy_size'])
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, [20], ['dependency_proxy_size'])
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
end
def create_manifest(group_id, size)
dependency_proxy_manifests.create!(
group_id: group_id,
size: size,
file_name: 'test-file',
file: 'test',
digest: 'abc123'
)
end
def create_blob(group_id, size)
dependency_proxy_blobs.create!(
group_id: group_id,
size: size,
file_name: 'test-file',
file: 'test'
)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment