Commit 52cd0b7e authored by Adam Hegyi's avatar Adam Hegyi Committed by Tan Le

Improve performance of audit events CSV export

For search UI, the audit events scope is limit by pagination. This does
not exhibit the same problem with CSV export where scope does not
have set limit (we use file size limit as the stopping mechanism).

Preloading associated records, hence, requires to be performed in
batches to avoid overloading memory. BatchLoader cache was designed to
hold data for one web request, which in our case might yield significant
amount of user records. To keep the memory growth under control, we're
clearing up the cached data after each iteration (i.e. after 1K
records).

A fix for `MissingAttribute` error when calling `select` without
`details` field is also introduced. This is caused by the call back
method to initialize `details` after AR model loaded from database. This
change allows the following usage: `AuditEvent.select(:id).first`.
Co-authored-by: default avatarTan Le <tle@gitlab.com>
parent dcda156e
......@@ -4,6 +4,7 @@ class AuditEvent < ApplicationRecord
include CreatedAtFilterable
include IgnorableColumns
include BulkInsertSafe
include EachBatch
PARALLEL_PERSISTENCE_COLUMNS = [
:author_name,
......@@ -54,7 +55,9 @@ class AuditEvent < ApplicationRecord
end
def initialize_details
self.details = {} if details.nil?
return unless self.has_attribute?(:details)
self.details = {} if details&.nil?
end
def author_name
......
......@@ -20,7 +20,7 @@ module AuditEvents
def data
events = AuditLogFinder.new(finder_params).execute
Gitlab::Audit::Events::Preloader.preload!(events)
Gitlab::Audit::Events::Preloader.new(events)
end
def finder_params
......
---
title: Improve audit events preloading logic for CSV export
merge_request: 44958
author:
type: performance
......@@ -109,7 +109,7 @@ module Audit
end
def detail_value
@details.each_value.first
String(@details.each_value.first)
end
def oauth_label
......
......@@ -12,6 +12,25 @@ module Gitlab
end
end
end
def initialize(audit_events)
@audit_events = audit_events
end
def find_each(&block)
@audit_events.each_batch(column: :created_at) do |relation|
relation.each do |audit_event|
audit_event.lazy_author
audit_event.lazy_entity
end
relation.each do |audit_event|
yield(audit_event)
end
BatchLoader::Executor.clear_current
end
end
end
end
end
......
......@@ -3,11 +3,10 @@
require 'spec_helper'
RSpec.describe Gitlab::Audit::Events::Preloader do
describe '.preload!' do
let_it_be(:audit_events) { create_list(:audit_event, 2) }
let(:audit_events_relation) { AuditEvent.where(id: audit_events.map(&:id)) }
let_it_be(:audit_events) { create_list(:audit_event, 2) }
let(:audit_events_relation) { AuditEvent.where(id: audit_events.map(&:id)) }
describe '.preload!' do
subject { described_class.preload!(audit_events_relation) }
it 'returns an ActiveRecord::Relation' do
......@@ -15,12 +14,6 @@ RSpec.describe Gitlab::Audit::Events::Preloader do
end
it 'preloads associated records' do
log = ActiveRecord::QueryRecorder.new do
subject.map do |event|
[event.author_name, event.lazy_entity.name]
end
end
# Expected queries when requesting for AuditEvent with associated records
#
# 1. On the audit_events table
......@@ -30,7 +23,39 @@ RSpec.describe Gitlab::Audit::Events::Preloader do
# 3. On the users table for entity name
# SELECT "users".* FROM "users" WHERE "users"."id" IN (2, 4)
#
expect(log.count).to eq(3)
expect do
subject.map do |event|
[event.author_name, event.lazy_entity.name]
end
end.not_to exceed_query_limit(3)
end
end
describe '#find_each' do
let(:preloader) { described_class.new(audit_events_relation) }
it 'yields a list audit events' do
expect { |b| preloader.find_each(&b) }.to yield_successive_args(*audit_events)
end
it 'loads audit events in batches with preloaded associated records' do
# Expected queries when requesting for AuditEvent with associated records
#
# 1. Get the start of created_at value in 1k batch
# SELECT "audit_events"."created_at" FROM "audit_events" WHERE "audit_events"."id" IN (1, 2) ORDER BY "audit_events"."created_at" ASC LIMIT 1
# 2. Get the end of created_at value in 1k batch
# SELECT "audit_events"."created_at" FROM "audit_events" WHERE "audit_events"."id" IN (1, 2) AND "audit_events"."created_at" >= '2020-10-15 04:51:06.392709' ORDER BY "audit_events"."created_at" ASC LIMIT 1 OFFSET 1000
# 3. Get the audit_events in 1k batch
# SELECT "audit_events".* FROM "audit_events" WHERE "audit_events"."id" IN (1, 2) AND "audit_events"."created_at" >= '2020-10-15 04:51:06.392709'
# 4. On the users table for author_name
# SELECT "users"."id", "users"."name", "users"."username" FROM "users" WHERE "users"."id" IN (1, 3) ORDER BY "users"."id" ASC LIMIT 1000
# 5. On the users table for entity name
# SELECT "users".* FROM "users" WHERE "users"."id" IN (2, 4) ORDER BY "users"."id" ASC LIMIT 1000
expect do
preloader.find_each do |event|
[event.author_name, event.lazy_entity.name]
end
end.not_to exceed_query_limit(5)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment