Commit 32806ed2 authored by Vitali Tatarintev's avatar Vitali Tatarintev

Merge branch '335391-new-vsa-queries' into 'master'

Optionally use aggregated VSA backend

See merge request gitlab-org/gitlab!72978
parents c3178993 724c6378
......@@ -11,6 +11,12 @@ module Analytics
alias_attribute :state, :state_id
enum state: Issue.available_states, _suffix: true
scope :assigned_to, ->(user) do
assignees_class = IssueAssignee
condition = assignees_class.where(user_id: user).where(arel_table[:issue_id].eq(assignees_class.arel_table[:issue_id]))
where(condition.arel.exists)
end
def self.issuable_id_column
:issue_id
end
......
......@@ -11,6 +11,12 @@ module Analytics
alias_attribute :state, :state_id
enum state: MergeRequest.available_states, _suffix: true
scope :assigned_to, ->(user) do
assignees_class = MergeRequestAssignee
condition = assignees_class.where(user_id: user).where(arel_table[:merge_request_id].eq(assignees_class.arel_table[:merge_request_id]))
where(condition.arel.exists)
end
def self.issuable_id_column
:merge_request_id
end
......
......@@ -5,6 +5,19 @@ module Analytics
module StageEventModel
extend ActiveSupport::Concern
included do
scope :by_stage_event_hash_id, ->(id) { where(stage_event_hash_id: id) }
scope :by_project_id, ->(id) { where(project_id: id) }
scope :by_group_id, ->(id) { where(group_id: id) }
scope :end_event_timestamp_after, -> (date) { where(arel_table[:end_event_timestamp].gteq(date)) }
scope :end_event_timestamp_before, -> (date) { where(arel_table[:end_event_timestamp].lteq(date)) }
scope :start_event_timestamp_after, -> (date) { where(arel_table[:start_event_timestamp].gteq(date)) }
scope :start_event_timestamp_before, -> (date) { where(arel_table[:start_event_timestamp].lteq(date)) }
scope :authored, ->(user) { where(author_id: user) }
scope :with_milestone_id, ->(milestone_id) { where(milestone_id: milestone_id) }
scope :end_event_is_not_happened_yet, -> { where(end_event_timestamp: nil) }
end
class_methods do
def upsert_data(data)
upsert_values = data.map do |row|
......
---
name: use_vsa_aggregated_tables
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/72978
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/343429
milestone: '14.5'
type: development
group: group::optimize
default_enabled: false
# frozen_string_literal: true
module EE::Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder
extend ::Gitlab::Utils::Override
override :build
def build
filter_by_project_ids(super)
end
private
override :filter_by_stage_parent
def filter_by_stage_parent(query)
return super unless stage.parent.instance_of?(Group)
query.by_group_id(stage.parent.self_and_descendant_ids)
end
def filter_by_project_ids(query)
return query unless stage.parent.instance_of?(Group)
return query if params[:project_ids].blank?
project_ids = Project
.id_in(params[:project_ids])
.in_namespace(stage.parent.self_and_descendant_ids)
.select(:id)
return query if project_ids.empty?
query.by_project_id(project_ids)
end
end
......@@ -81,7 +81,8 @@ module EE
::Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestLastBuildFinished,
::Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestLastEdited,
::Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestLabelAdded,
::Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestLabelRemoved
::Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestLabelRemoved,
::Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestMerged
],
::Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestCreated => [
::Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestClosed,
......
......@@ -258,6 +258,7 @@ RSpec.describe 'Group value stream analytics filters and data', :js do
end
before do
stub_feature_flags(use_vsa_aggregated_tables: false)
issue.update!(created_at: 5.days.ago)
create_cycle(user, project, issue, mr, milestone, pipeline)
create(:labeled_issue, created_at: 5.days.ago, project: create(:project, group: group), labels: [group_label1])
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder do
let_it_be(:group) { create(:group) }
let_it_be(:sub_group) { create(:group, parent: group) }
let_it_be(:project_1) { create(:project, group: sub_group) }
let_it_be(:project_2) { create(:project, group: sub_group) }
let_it_be(:other_group) { create(:group) }
let_it_be(:other_project) { create(:project, group: other_group) }
let_it_be(:stage) do
create(:cycle_analytics_group_stage,
group: group,
start_event_identifier: :issue_created,
end_event_identifier: :issue_deployed_to_production
)
end
let_it_be(:stage_event_1) do
create(:cycle_analytics_issue_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: sub_group.id,
project_id: project_1.id,
issue_id: 1
)
end
let_it_be(:stage_event_2) do
create(:cycle_analytics_issue_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: sub_group.id,
project_id: project_2.id,
issue_id: 2
)
end
let_it_be(:stage_event_3) do
create(:cycle_analytics_issue_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: other_group.id,
project_id: other_project.id,
issue_id: 3
)
end
let(:params) do
{
from: 1.year.ago.to_date,
to: Date.today
}
end
subject(:issue_ids) { described_class.new(stage: stage, params: params).build.pluck(:issue_id) }
it 'looks up items within the group hierarchy' do
expect(issue_ids).to eq([stage_event_1.issue_id, stage_event_2.issue_id])
expect(issue_ids).not_to include([stage_event_3.issue_id])
end
it 'accepts project_ids filter' do
params[:project_ids] = [project_1.id, other_project.id]
expect(issue_ids).to eq([stage_event_1.issue_id])
end
end
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
module Aggregated
# rubocop: disable CodeReuse/ActiveRecord
class BaseQueryBuilder
include StageQueryHelpers
MODEL_CLASSES = {
MergeRequest.to_s => ::Analytics::CycleAnalytics::MergeRequestStageEvent,
Issue.to_s => ::Analytics::CycleAnalytics::IssueStageEvent
}.freeze
# Allowed params:
# * from - stage end date filter start date
# * to - stage end date filter to date
# * author_username
# * milestone_title
# * label_name (array)
# * assignee_username (array)
# * project_ids (array)
def initialize(stage:, params: {})
@stage = stage
@params = params
@root_ancestor = stage.parent.root_ancestor
@stage_event_model = MODEL_CLASSES.fetch(stage.subject_class.to_s)
end
def build
query = base_query
query = filter_by_stage_parent(query)
query = filter_author(query)
query = filter_milestone_ids(query)
query = filter_label_names(query)
filter_assignees(query)
end
def filter_author(query)
return query if params[:author_username].blank?
user = User.by_username(params[:author_username]).first
return query.none if user.blank?
query.authored(user)
end
def filter_milestone_ids(query)
return query if params[:milestone_title].blank?
milestone = MilestonesFinder
.new(group_ids: root_ancestor.self_and_descendant_ids, project_ids: root_ancestor.all_projects.select(:id), title: params[:milestone_title])
.execute
.first
return query.none if milestone.blank?
query.with_milestone_id(milestone.id)
end
def filter_label_names(query)
return query if params[:label_name].blank?
all_label_ids = Issuables::LabelFilter
.new(group: root_ancestor, project: nil, params: { label_name: params[:label_name] })
.find_label_ids(params[:label_name])
return query.none if params[:label_name].size != all_label_ids.size
all_label_ids.each do |label_ids|
relation = LabelLink
.where(target_type: stage.subject_class.name)
.where(LabelLink.arel_table['target_id'].eq(query.model.arel_table[query.model.issuable_id_column]))
relation = relation.where(label_id: label_ids)
query = query.where(relation.arel.exists)
end
query
end
def filter_assignees(query)
return query if params[:assignee_username].blank?
Issuables::AssigneeFilter
.new(params: { assignee_username: params[:assignee_username] })
.filter(query)
end
def filter_by_stage_parent(query)
query.by_project_id(stage.parent_id)
end
def base_query
query = stage_event_model
.by_stage_event_hash_id(stage.stage_event_hash_id)
from = params[:from] || 30.days.ago
if in_progress?
query = query
.end_event_is_not_happened_yet
.opened_state
.start_event_timestamp_after(from)
query = query.start_event_timestamp_before(params[:to]) if params[:to]
else
query = query.end_event_timestamp_after(from)
query = query.end_event_timestamp_before(params[:to]) if params[:to]
end
query
end
private
attr_reader :stage, :params, :root_ancestor, :stage_event_model
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
end
Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder.prepend_mod_with('Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder')
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
module Aggregated
# Arguments:
# stage - an instance of CycleAnalytics::ProjectStage or CycleAnalytics::GroupStage
# params:
# current_user: an instance of User
# from: DateTime
# to: DateTime
class DataCollector
include Gitlab::Utils::StrongMemoize
MAX_COUNT = 10001
delegate :serialized_records, to: :records_fetcher
def initialize(stage:, params: {})
@stage = stage
@params = params
end
def median
strong_memoize(:median) { Median.new(stage: stage, query: query, params: params) }
end
def count
strong_memoize(:count) { limit_count }
end
private
attr_reader :stage, :params
def query
BaseQueryBuilder.new(stage: stage, params: params).build
end
def limit_count
query.limit(MAX_COUNT).count
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
module Aggregated
class Median
include StageQueryHelpers
def initialize(stage:, query:, params:)
@stage = stage
@query = query
@params = params
end
# rubocop: disable CodeReuse/ActiveRecord
def seconds
@query = @query.select(median_duration_in_seconds.as('median')).reorder(nil)
result = @query.take || {}
result['median'] || nil
end
# rubocop: enable CodeReuse/ActiveRecord
def days
seconds ? seconds.fdiv(1.day) : nil
end
private
attr_reader :stage, :query, :params
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
module Aggregated
module StageQueryHelpers
def percentile_cont
percentile_cont_ordering = Arel::Nodes::UnaryOperation.new(Arel::Nodes::SqlLiteral.new('ORDER BY'), duration)
Arel::Nodes::NamedFunction.new(
'percentile_cont(0.5) WITHIN GROUP',
[percentile_cont_ordering]
)
end
def duration
if in_progress?
Arel::Nodes::Subtraction.new(
Arel::Nodes::NamedFunction.new('TO_TIMESTAMP', [Time.current.to_i]),
query.model.arel_table[:start_event_timestamp]
)
else
Arel::Nodes::Subtraction.new(
query.model.arel_table[:end_event_timestamp],
query.model.arel_table[:start_event_timestamp]
)
end
end
def median_duration_in_seconds
Arel::Nodes::Extract.new(percentile_cont, :epoch)
end
def in_progress?
params[:end_event_filter] == :in_progress
end
end
end
end
end
end
......@@ -29,7 +29,11 @@ module Gitlab
def median
strong_memoize(:median) do
Median.new(stage: stage, query: query, params: params)
if use_aggregated_data_collector?
aggregated_data_collector.median
else
Median.new(stage: stage, query: query, params: params)
end
end
end
......@@ -41,7 +45,11 @@ module Gitlab
def count
strong_memoize(:count) do
limit_count
if use_aggregated_data_collector?
aggregated_data_collector.count
else
limit_count
end
end
end
......@@ -59,6 +67,14 @@ module Gitlab
def limit_count
query.limit(MAX_COUNT).count
end
def aggregated_data_collector
@aggregated_data_collector ||= Aggregated::DataCollector.new(stage: stage, params: params)
end
def use_aggregated_data_collector?
params.fetch(:use_aggregated_data_collector, false)
end
end
end
end
......
......@@ -79,7 +79,8 @@ module Gitlab
sort: sort&.to_sym,
direction: direction&.to_sym,
page: page,
end_event_filter: end_event_filter.to_sym
end_event_filter: end_event_filter.to_sym,
use_aggregated_data_collector: Feature.enabled?(:use_vsa_aggregated_tables, group || project, default_enabled: :yaml)
}.merge(attributes.symbolize_keys.slice(*FINDER_PARAM_NAMES))
end
......
......@@ -16,6 +16,7 @@ RSpec.describe Projects::Analytics::CycleAnalytics::StagesController do
end
before do
stub_feature_flags(use_vsa_aggregated_tables: false)
sign_in(user)
end
......
# frozen_string_literal: true
FactoryBot.define do
factory :cycle_analytics_issue_stage_event, class: 'Analytics::CycleAnalytics::IssueStageEvent' do
sequence(:stage_event_hash_id) { |n| n }
sequence(:issue_id) { 0 }
sequence(:group_id) { 0 }
sequence(:project_id) { 0 }
start_event_timestamp { 3.weeks.ago.to_date }
end_event_timestamp { 2.weeks.ago.to_date }
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :cycle_analytics_merge_request_stage_event, class: 'Analytics::CycleAnalytics::MergeRequestStageEvent' do
sequence(:stage_event_hash_id) { |n| n }
sequence(:merge_request_id) { 0 }
sequence(:group_id) { 0 }
sequence(:project_id) { 0 }
start_event_timestamp { 3.weeks.ago.to_date }
end_event_timestamp { 2.weeks.ago.to_date }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder do
let_it_be(:group) { create(:group) }
let_it_be(:project) { create(:project, group: group) }
let_it_be(:milestone) { create(:milestone, project: project) }
let_it_be(:user_1) { create(:user) }
let_it_be(:label_1) { create(:label, project: project) }
let_it_be(:label_2) { create(:label, project: project) }
let_it_be(:issue_1) { create(:issue, project: project, author: project.creator, labels: [label_1, label_2]) }
let_it_be(:issue_2) { create(:issue, project: project, milestone: milestone, assignees: [user_1]) }
let_it_be(:issue_3) { create(:issue, project: project) }
let_it_be(:issue_outside_project) { create(:issue) }
let_it_be(:stage) do
create(:cycle_analytics_project_stage,
project: project,
start_event_identifier: :issue_created,
end_event_identifier: :issue_deployed_to_production
)
end
let_it_be(:stage_event_1) do
create(:cycle_analytics_issue_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: group.id,
project_id: project.id,
issue_id: issue_1.id,
author_id: project.creator.id,
milestone_id: nil,
state_id: issue_1.state_id,
end_event_timestamp: 8.months.ago
)
end
let_it_be(:stage_event_2) do
create(:cycle_analytics_issue_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: group.id,
project_id: project.id,
issue_id: issue_2.id,
author_id: nil,
milestone_id: milestone.id,
state_id: issue_2.state_id
)
end
let_it_be(:stage_event_3) do
create(:cycle_analytics_issue_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: group.id,
project_id: project.id,
issue_id: issue_3.id,
author_id: nil,
milestone_id: milestone.id,
state_id: issue_3.state_id,
start_event_timestamp: 8.months.ago,
end_event_timestamp: nil
)
end
let(:params) do
{
from: 1.year.ago.to_date,
to: Date.today
}
end
subject(:issue_ids) { described_class.new(stage: stage, params: params).build.pluck(:issue_id) }
it 'scopes the query for the given project' do
expect(issue_ids).to match_array([issue_1.id, issue_2.id])
expect(issue_ids).not_to include([issue_outside_project.id])
end
describe 'author_username param' do
it 'returns stage events associated with the given author' do
params[:author_username] = project.creator.username
expect(issue_ids).to eq([issue_1.id])
end
it 'returns empty result when unknown author is given' do
params[:author_username] = 'no one'
expect(issue_ids).to be_empty
end
end
describe 'milestone_title param' do
it 'returns stage events associated with the milestone' do
params[:milestone_title] = milestone.title
expect(issue_ids).to eq([issue_2.id])
end
it 'returns empty result when unknown milestone is given' do
params[:milestone_title] = 'unknown milestone'
expect(issue_ids).to be_empty
end
end
describe 'label_name param' do
it 'returns stage events associated with multiple labels' do
params[:label_name] = [label_1.name, label_2.name]
expect(issue_ids).to eq([issue_1.id])
end
it 'does not include records with partial label match' do
params[:label_name] = [label_1.name, 'other label']
expect(issue_ids).to be_empty
end
end
describe 'assignee_username param' do
it 'returns stage events associated assignee' do
params[:assignee_username] = [user_1.username]
expect(issue_ids).to eq([issue_2.id])
end
end
describe 'timestamp filtering' do
before do
params[:from] = 1.year.ago
params[:to] = 6.months.ago
end
it 'filters by the end event time range' do
expect(issue_ids).to eq([issue_1.id])
end
context 'when in_progress items are requested' do
before do
params[:end_event_filter] = :in_progress
end
it 'filters by the start event time range' do
expect(issue_ids).to eq([issue_3.id])
end
end
end
end
......@@ -13,5 +13,8 @@ RSpec.describe Analytics::CycleAnalytics::IssueStageEvent do
expect(described_class.states).to eq(Issue.available_states)
end
it_behaves_like 'StageEventModel'
it_behaves_like 'StageEventModel' do
let_it_be(:stage_event_factory) { :cycle_analytics_issue_stage_event }
let_it_be(:issuable_factory) { :issue }
end
end
......@@ -13,5 +13,8 @@ RSpec.describe Analytics::CycleAnalytics::MergeRequestStageEvent do
expect(described_class.states).to eq(MergeRequest.available_states)
end
it_behaves_like 'StageEventModel'
it_behaves_like 'StageEventModel' do
let_it_be(:stage_event_factory) { :cycle_analytics_merge_request_stage_event }
let_it_be(:issuable_factory) { :merge_request }
end
end
......@@ -74,4 +74,108 @@ RSpec.shared_examples 'StageEventModel' do
expect(input_data.map(&:values).sort).to eq(output_data)
end
end
describe 'scopes' do
def attributes(array)
array.map(&:attributes)
end
RSpec::Matchers.define :match_attributes do |expected|
match do |actual|
actual.map(&:attributes) == expected.map(&:attributes)
end
end
let_it_be(:user) { create(:user) }
let_it_be(:project) { create(:user) }
let_it_be(:milestone) { create(:milestone) }
let_it_be(:issuable_with_assignee) { create(issuable_factory, assignees: [user])}
let_it_be(:record) { create(stage_event_factory, start_event_timestamp: 3.years.ago.to_date, end_event_timestamp: 2.years.ago.to_date) }
let_it_be(:record_with_author) { create(stage_event_factory, author_id: user.id) }
let_it_be(:record_with_project) { create(stage_event_factory, project_id: project.id) }
let_it_be(:record_with_group) { create(stage_event_factory, group_id: project.namespace_id) }
let_it_be(:record_with_assigned_issuable) { create(stage_event_factory, described_class.issuable_id_column => issuable_with_assignee.id) }
let_it_be(:record_with_milestone) { create(stage_event_factory, milestone_id: milestone.id) }
it 'filters by stage_event_hash_id' do
records = described_class.by_stage_event_hash_id(record.stage_event_hash_id)
expect(records).to match_attributes([record])
end
it 'filters by project_id' do
records = described_class.by_project_id(project.id)
expect(records).to match_attributes([record_with_project])
end
it 'filters by group_id' do
records = described_class.by_group_id(project.namespace_id)
expect(records).to match_attributes([record_with_group])
end
it 'filters by author_id' do
records = described_class.authored(user)
expect(records).to match_attributes([record_with_author])
end
it 'filters by assignee' do
records = described_class.assigned_to(user)
expect(records).to match_attributes([record_with_assigned_issuable])
end
it 'filters by milestone_id' do
records = described_class.with_milestone_id(milestone.id)
expect(records).to match_attributes([record_with_milestone])
end
describe 'start_event_timestamp filtering' do
it 'when range is given' do
records = described_class
.start_event_timestamp_after(4.years.ago)
.start_event_timestamp_before(2.years.ago)
expect(records).to match_attributes([record])
end
it 'when specifying upper bound' do
records = described_class.start_event_timestamp_before(2.years.ago)
expect(attributes(records)).to include(attributes([record]).first)
end
it 'when specifying the lower bound' do
records = described_class.start_event_timestamp_after(4.years.ago)
expect(attributes(records)).to include(attributes([record]).first)
end
end
describe 'end_event_timestamp filtering' do
it 'when range is given' do
records = described_class
.end_event_timestamp_after(3.years.ago)
.end_event_timestamp_before(1.year.ago)
expect(records).to match_attributes([record])
end
it 'when specifying upper bound' do
records = described_class.end_event_timestamp_before(1.year.ago)
expect(attributes(records)).to include(attributes([record]).first)
end
it 'when specifying the lower bound' do
records = described_class.end_event_timestamp_after(3.years.ago)
expect(attributes(records)).to include(attributes([record]).first)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment