Commit 578ca465 authored by Adam Hegyi's avatar Adam Hegyi Committed by Stan Hu

Implement new query backend for Cycle Analytics

- Providing median based on durations defined by the stage
- Providing list of serialized objects by stage
- Introduce DurationFilter class that decides whether we need
additional filtering to avoid negative durations
parent 9b2b965f
......@@ -9,6 +9,7 @@ module Analytics
belongs_to :project
alias_attribute :parent, :project
alias_attribute :parent_id, :project_id
end
end
end
......@@ -47,11 +47,17 @@ module Analytics
!custom
end
# The model that is going to be queried, Issue or MergeRequest
def subject_model
# The model class that is going to be queried, Issue or MergeRequest
def subject_class
start_event.object_type
end
def matches_with_stage_params?(stage_params)
default_stage? &&
start_event_identifier.to_s.eql?(stage_params[:start_event_identifier].to_s) &&
end_event_identifier.to_s.eql?(stage_params[:end_event_identifier].to_s)
end
private
def validate_stage_event_pairs
......
# frozen_string_literal: true
class IndexTimestampColumnsForIssueMetrics < ActiveRecord::Migration[5.2]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index(*index_arguments)
end
def down
remove_concurrent_index(*index_arguments)
end
private
def index_arguments
[
:issue_metrics,
[:issue_id, :first_mentioned_in_commit_at, :first_associated_with_milestone_at, :first_added_to_board_at],
{
name: 'index_issue_metrics_on_issue_id_and_timestamps'
}
]
end
end
# frozen_string_literal: true
class IndexTimestampColumnsForMergeRequestsCreationDate < ActiveRecord::Migration[5.2]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index(*index_arguments)
end
def down
remove_concurrent_index(*index_arguments)
end
private
def index_arguments
[
:merge_requests,
[:target_project_id, :created_at],
{
name: 'index_merge_requests_target_project_id_created_at'
}
]
end
end
......@@ -1816,6 +1816,7 @@ ActiveRecord::Schema.define(version: 2019_10_04_134055) do
t.datetime "first_added_to_board_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["issue_id", "first_mentioned_in_commit_at", "first_associated_with_milestone_at", "first_added_to_board_at"], name: "index_issue_metrics_on_issue_id_and_timestamps"
t.index ["issue_id"], name: "index_issue_metrics"
end
......@@ -2226,6 +2227,7 @@ ActiveRecord::Schema.define(version: 2019_10_04_134055) do
t.index ["source_project_id", "source_branch"], name: "index_merge_requests_on_source_project_id_and_source_branch"
t.index ["state", "merge_status"], name: "index_merge_requests_on_state_and_merge_status", where: "(((state)::text = 'opened'::text) AND ((merge_status)::text = 'can_be_merged'::text))"
t.index ["target_branch"], name: "index_merge_requests_on_target_branch"
t.index ["target_project_id", "created_at"], name: "index_merge_requests_target_project_id_created_at"
t.index ["target_project_id", "iid"], name: "index_merge_requests_on_target_project_id_and_iid", unique: true
t.index ["target_project_id", "iid"], name: "index_merge_requests_on_target_project_id_and_iid_opened", where: "((state)::text = 'opened'::text)"
t.index ["target_project_id", "merge_commit_sha", "id"], name: "index_merge_requests_on_tp_id_and_merge_commit_sha_and_id"
......
......@@ -9,6 +9,7 @@ module Analytics
belongs_to :group
alias_attribute :parent, :group
alias_attribute :parent_id, :group_id
end
end
end
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
class BaseQueryBuilder
include Gitlab::CycleAnalytics::MetricsTables
delegate :subject_class, to: :stage
# rubocop: disable CodeReuse/ActiveRecord
def initialize(stage:, params: {})
@stage = stage
@params = params
end
def build
query = subject_class
query = filter_by_parent_model(query)
query = filter_by_time_range(query)
query = stage.start_event.apply_query_customization(query)
query = stage.end_event.apply_query_customization(query)
query.where(duration_condition)
end
private
attr_reader :stage, :params
def duration_condition
stage.end_event.timestamp_projection.gteq(stage.start_event.timestamp_projection)
end
def filter_by_parent_model(query)
if parent_class.eql?(Project)
if subject_class.eql?(Issue)
query.where(project_id: stage.parent_id)
elsif subject_class.eql?(MergeRequest)
query.where(target_project_id: stage.parent_id)
else
raise ArgumentError, "unknown subject_class: #{subject_class}"
end
else
raise ArgumentError, "unknown parent_class: #{parent_class}"
end
end
def filter_by_time_range(query)
from = params.fetch(:from, 30.days.ago)
to = params[:to]
query = query.where(subject_table[:created_at].gteq(from))
query = query.where(subject_table[:created_at].lteq(to)) if to
query
end
def subject_table
subject_class.arel_table
end
def parent_class
stage.parent.class
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
# Arguments:
# stage - an instance of CycleAnalytics::ProjectStage or CycleAnalytics::GroupStage
# params:
# current_user: an instance of User
# from: DateTime
# to: DateTime
class DataCollector
include Gitlab::Utils::StrongMemoize
def initialize(stage:, params: {})
@stage = stage
@params = params
end
def records_fetcher
strong_memoize(:records_fetcher) do
RecordsFetcher.new(stage: stage, query: query, params: params)
end
end
def median
strong_memoize(:median) do
Median.new(stage: stage, query: query)
end
end
private
attr_reader :stage, :params
def query
BaseQueryBuilder.new(stage: stage, params: params).build
end
end
end
end
end
......@@ -92,8 +92,8 @@ module Gitlab
name: 'production',
custom: false,
relative_position: 7,
start_event_identifier: :merge_request_merged,
end_event_identifier: :merge_request_first_deployed_to_production
start_event_identifier: :issue_created,
end_event_identifier: :production_stage_end
}
end
end
......
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
class Median
include StageQueryHelpers
def initialize(stage:, query:)
@stage = stage
@query = query
end
def seconds
@query = @query.select(median_duration_in_seconds.as('median'))
result = execute_query(@query).first || {}
result['median'] ? result['median'].to_i : nil
end
private
attr_reader :stage
def percentile_cont
percentile_cont_ordering = Arel::Nodes::UnaryOperation.new(Arel::Nodes::SqlLiteral.new('ORDER BY'), duration)
Arel::Nodes::NamedFunction.new(
'percentile_cont(0.5) WITHIN GROUP',
[percentile_cont_ordering]
)
end
def median_duration_in_seconds
Arel::Nodes::Extract.new(percentile_cont, :epoch)
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
class RecordsFetcher
include Gitlab::Utils::StrongMemoize
include StageQueryHelpers
include Gitlab::CycleAnalytics::MetricsTables
MAX_RECORDS = 20
MAPPINGS = {
Issue => {
finder_class: IssuesFinder,
serializer_class: AnalyticsIssueSerializer,
includes_for_query: { project: [:namespace], author: [] },
columns_for_select: %I[title iid id created_at author_id project_id]
},
MergeRequest => {
finder_class: MergeRequestsFinder,
serializer_class: AnalyticsMergeRequestSerializer,
includes_for_query: { target_project: [:namespace], author: [] },
columns_for_select: %I[title iid id created_at author_id state target_project_id]
}
}.freeze
delegate :subject_class, to: :stage
def initialize(stage:, query:, params: {})
@stage = stage
@query = query
@params = params
end
def serialized_records
strong_memoize(:serialized_records) do
# special case (legacy): 'Test' and 'Staging' stages should show Ci::Build records
if default_test_stage? || default_staging_stage?
AnalyticsBuildSerializer.new.represent(ci_build_records.map { |e| e['build'] })
else
records.map do |record|
project = record.project
attributes = record.attributes.merge({
project_path: project.path,
namespace_path: project.namespace.path,
author: record.author
})
serializer.represent(attributes)
end
end
end
end
private
attr_reader :stage, :query, :params
def finder_query
MAPPINGS
.fetch(subject_class)
.fetch(:finder_class)
.new(params.fetch(:current_user), finder_params.fetch(stage.parent.class))
.execute
end
def columns
MAPPINGS.fetch(subject_class).fetch(:columns_for_select).map do |column_name|
subject_class.arel_table[column_name]
end
end
# EE will override this to include Group rules
def finder_params
{
Project => { project_id: stage.parent_id }
}
end
def default_test_stage?
stage.matches_with_stage_params?(Gitlab::Analytics::CycleAnalytics::DefaultStages.params_for_test_stage)
end
def default_staging_stage?
stage.matches_with_stage_params?(Gitlab::Analytics::CycleAnalytics::DefaultStages.params_for_staging_stage)
end
def serializer
MAPPINGS.fetch(subject_class).fetch(:serializer_class).new
end
# Loading Ci::Build records instead of MergeRequest records
# rubocop: disable CodeReuse/ActiveRecord
def ci_build_records
ci_build_join = mr_metrics_table
.join(build_table)
.on(mr_metrics_table[:pipeline_id].eq(build_table[:commit_id]))
.join_sources
q = ordered_and_limited_query
.joins(ci_build_join)
.select(build_table[:id], round_duration_to_seconds.as('total_time'))
results = execute_query(q).to_a
Gitlab::CycleAnalytics::Updater.update!(results, from: 'id', to: 'build', klass: ::Ci::Build.includes({ project: [:namespace], user: [], pipeline: [] }))
end
def ordered_and_limited_query
query
.reorder(stage.end_event.timestamp_projection.desc)
.limit(MAX_RECORDS)
end
def records
results = finder_query
.merge(ordered_and_limited_query)
.select(*columns, round_duration_to_seconds.as('total_time'))
# using preloader instead of includes to avoid AR generating a large column list
ActiveRecord::Associations::Preloader.new.preload(
results,
MAPPINGS.fetch(subject_class).fetch(:includes_for_query)
)
results
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Analytics
module CycleAnalytics
module StageQueryHelpers
def execute_query(query)
ActiveRecord::Base.connection.execute(query.to_sql)
end
def zero_interval
Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")])
end
def round_duration_to_seconds
Arel::Nodes::Extract.new(duration, :epoch)
end
def duration
Arel::Nodes::Subtraction.new(
stage.end_event.timestamp_projection,
stage.start_event.timestamp_projection
)
end
end
end
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :cycle_analytics_project_stage, class: Analytics::CycleAnalytics::ProjectStage do
project
sequence(:name) { |n| "Stage ##{n}" }
hidden { false }
issue_stage
trait :issue_stage do
start_event_identifier { Gitlab::Analytics::CycleAnalytics::StageEvents::IssueCreated.identifier }
end_event_identifier { Gitlab::Analytics::CycleAnalytics::StageEvents::IssueStageEnd.identifier }
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Analytics::CycleAnalytics::BaseQueryBuilder do
let_it_be(:project) { create(:project, :empty_repo) }
let_it_be(:mr1) { create(:merge_request, target_project: project, source_project: project, allow_broken: true, created_at: 3.months.ago) }
let_it_be(:mr2) { create(:merge_request, target_project: project, source_project: project, allow_broken: true, created_at: 1.month.ago) }
let(:params) { {} }
let(:records) do
stage = build(:cycle_analytics_project_stage, {
start_event_identifier: :merge_request_created,
end_event_identifier: :merge_request_merged,
project: project
})
described_class.new(stage: stage, params: params).build.to_a
end
before do
mr1.metrics.update!(merged_at: 1.month.ago)
mr2.metrics.update!(merged_at: Time.now)
end
around do |example|
Timecop.freeze { example.run }
end
describe 'date range parameters' do
context 'when filters by only the `from` parameter' do
before do
params[:from] = 4.months.ago
end
it { expect(records.size).to eq(2) }
end
context 'when filters by both `from` and `to` parameters' do
before do
params.merge!(from: 4.months.ago, to: 2.months.ago)
end
it { expect(records.size).to eq(1) }
end
context 'invalid date range is provided' do
before do
params.merge!(from: 1.month.ago, to: 10.months.ago)
end
it { expect(records.size).to eq(0) }
end
end
it 'scopes query within the target project' do
other_mr = create(:merge_request, source_project: create(:project), allow_broken: true, created_at: 2.months.ago)
other_mr.metrics.update!(merged_at: 1.month.ago)
params[:from] = 1.year.ago
expect(records.size).to eq(2)
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Analytics::CycleAnalytics::RecordsFetcher do
around do |example|
Timecop.freeze { example.run }
end
let_it_be(:project) { create(:project, :empty_repo) }
let_it_be(:user) { create(:user) }
subject do
Gitlab::Analytics::CycleAnalytics::DataCollector.new(
stage: stage,
params: {
from: 1.year.ago,
current_user: user
}
).records_fetcher.serialized_records
end
describe '#serialized_records' do
shared_context 'when records are loaded by maintainer' do
before do
project.add_user(user, Gitlab::Access::MAINTAINER)
end
it 'returns all records' do
expect(subject.size).to eq(2)
end
end
describe 'for issue based stage' do
let_it_be(:issue1) { create(:issue, project: project) }
let_it_be(:issue2) { create(:issue, project: project, confidential: true) }
let(:stage) do
build(:cycle_analytics_project_stage, {
start_event_identifier: :plan_stage_start,
end_event_identifier: :issue_first_mentioned_in_commit,
project: project
})
end
before do
issue1.metrics.update(first_added_to_board_at: 3.days.ago, first_mentioned_in_commit_at: 2.days.ago)
issue2.metrics.update(first_added_to_board_at: 3.days.ago, first_mentioned_in_commit_at: 2.days.ago)
end
context 'when records are loaded by guest' do
before do
project.add_user(user, Gitlab::Access::GUEST)
end
it 'filters out confidential issues' do
expect(subject.size).to eq(1)
expect(subject.first[:iid].to_s).to eq(issue1.iid.to_s)
end
end
include_context 'when records are loaded by maintainer'
end
describe 'for merge request based stage' do
let(:mr1) { create(:merge_request, created_at: 5.days.ago, source_project: project, allow_broken: true) }
let(:mr2) { create(:merge_request, created_at: 4.days.ago, source_project: project, allow_broken: true) }
let(:stage) do
build(:cycle_analytics_project_stage, {
start_event_identifier: :merge_request_created,
end_event_identifier: :merge_request_merged,
project: project
})
end
before do
mr1.metrics.update(merged_at: 3.days.ago)
mr2.metrics.update(merged_at: 3.days.ago)
end
include_context 'when records are loaded by maintainer'
end
describe 'special case' do
let(:mr1) { create(:merge_request, source_project: project, allow_broken: true, created_at: 20.days.ago) }
let(:mr2) { create(:merge_request, source_project: project, allow_broken: true, created_at: 19.days.ago) }
let(:ci_build1) { create(:ci_build) }
let(:ci_build2) { create(:ci_build) }
let(:default_stages) { Gitlab::Analytics::CycleAnalytics::DefaultStages }
let(:stage) { build(:cycle_analytics_project_stage, default_stages.params_for_test_stage.merge(project: project)) }
before do
mr1.metrics.update!({
merged_at: 5.days.ago,
first_deployed_to_production_at: 1.day.ago,
latest_build_started_at: 5.days.ago,
latest_build_finished_at: 1.day.ago,
pipeline: ci_build1.pipeline
})
mr2.metrics.update!({
merged_at: 10.days.ago,
first_deployed_to_production_at: 5.days.ago,
latest_build_started_at: 9.days.ago,
latest_build_finished_at: 7.days.ago,
pipeline: ci_build2.pipeline
})
end
context 'returns build records' do
shared_examples 'orders build records by `latest_build_finished_at`' do
it 'orders by `latest_build_finished_at`' do
build_ids = subject.map { |item| item[:id] }
expect(build_ids).to eq([ci_build1.id, ci_build2.id])
end
end
context 'when requesting records for default test stage' do
include_examples 'orders build records by `latest_build_finished_at`'
end
context 'when requesting records for default staging stage' do
before do
stage.assign_attributes(default_stages.params_for_staging_stage)
end
include_examples 'orders build records by `latest_build_finished_at`'
end
end
end
end
end
......@@ -26,6 +26,13 @@ describe Gitlab::CycleAnalytics::CodeStage do
it_behaves_like 'base stage'
context 'when using the new query backend' do
include_examples 'Gitlab::Analytics::CycleAnalytics::DataCollector backend examples' do
let(:expected_record_count) { 2 }
let(:expected_ordered_attribute_values) { [mr_2.title, mr_1.title] }
end
end
describe '#project_median' do
around do |example|
Timecop.freeze { example.run }
......
......@@ -21,6 +21,13 @@ describe Gitlab::CycleAnalytics::IssueStage do
it_behaves_like 'base stage'
context 'when using the new query backend' do
include_examples 'Gitlab::Analytics::CycleAnalytics::DataCollector backend examples' do
let(:expected_record_count) { 3 }
let(:expected_ordered_attribute_values) { [issue_3.title, issue_2.title, issue_1.title] }
end
end
describe '#median' do
around do |example|
Timecop.freeze { example.run }
......
......@@ -21,6 +21,13 @@ describe Gitlab::CycleAnalytics::PlanStage do
it_behaves_like 'base stage'
context 'when using the new query backend' do
include_examples 'Gitlab::Analytics::CycleAnalytics::DataCollector backend examples' do
let(:expected_record_count) { 2 }
let(:expected_ordered_attribute_values) { [issue_1.title, issue_2.title] }
end
end
describe '#project_median' do
around do |example|
Timecop.freeze { example.run }
......
......@@ -52,3 +52,21 @@ shared_examples 'calculate #median with date range' do
it { expect(stage.project_median).to eq(nil) }
end
end
shared_examples 'Gitlab::Analytics::CycleAnalytics::DataCollector backend examples' do
let(:stage_params) { Gitlab::Analytics::CycleAnalytics::DefaultStages.send("params_for_#{stage_name}_stage").merge(project: project) }
let(:stage) { Analytics::CycleAnalytics::ProjectStage.new(stage_params) }
let(:data_collector) { Gitlab::Analytics::CycleAnalytics::DataCollector.new(stage: stage, params: { from: stage_options[:from], current_user: project.creator }) }
let(:attribute_to_verify) { :title }
context 'provides the same results as the old implementation' do
it 'for the median' do
expect(data_collector.median.seconds).to eq(ISSUES_MEDIAN)
end
it 'for the list of event records' do
records = data_collector.records_fetcher.serialized_records
expect(records.map { |event| event[attribute_to_verify] }).to eq(expected_ordered_attribute_values)
end
end
end
......@@ -12,17 +12,20 @@ describe Gitlab::CycleAnalytics::TestStage do
it_behaves_like 'base stage'
describe '#median' do
let(:mr_1) { create(:merge_request, :closed, source_project: project, created_at: 60.minutes.ago) }
let(:mr_2) { create(:merge_request, :closed, source_project: project, created_at: 40.minutes.ago, source_branch: 'A') }
let(:mr_3) { create(:merge_request, source_project: project, created_at: 10.minutes.ago, source_branch: 'B') }
let(:mr_4) { create(:merge_request, source_project: project, created_at: 10.minutes.ago, source_branch: 'C') }
let(:mr_5) { create(:merge_request, source_project: project, created_at: 10.minutes.ago, source_branch: 'D') }
let(:ci_build1) { create(:ci_build, project: project) }
let(:ci_build2) { create(:ci_build, project: project) }
before do
issue_1 = create(:issue, project: project, created_at: 90.minutes.ago)
issue_2 = create(:issue, project: project, created_at: 60.minutes.ago)
issue_3 = create(:issue, project: project, created_at: 60.minutes.ago)
mr_1 = create(:merge_request, :closed, source_project: project, created_at: 60.minutes.ago)
mr_2 = create(:merge_request, :closed, source_project: project, created_at: 40.minutes.ago, source_branch: 'A')
mr_3 = create(:merge_request, source_project: project, created_at: 10.minutes.ago, source_branch: 'B')
mr_4 = create(:merge_request, source_project: project, created_at: 10.minutes.ago, source_branch: 'C')
mr_5 = create(:merge_request, source_project: project, created_at: 10.minutes.ago, source_branch: 'D')
mr_1.metrics.update!(latest_build_started_at: 32.minutes.ago, latest_build_finished_at: 2.minutes.ago)
mr_2.metrics.update!(latest_build_started_at: 62.minutes.ago, latest_build_finished_at: 32.minutes.ago)
mr_1.metrics.update!(latest_build_started_at: 32.minutes.ago, latest_build_finished_at: 2.minutes.ago, pipeline_id: ci_build1.commit_id)
mr_2.metrics.update!(latest_build_started_at: 62.minutes.ago, latest_build_finished_at: 32.minutes.ago, pipeline_id: ci_build2.commit_id)
mr_3.metrics.update!(latest_build_started_at: nil, latest_build_finished_at: nil)
mr_4.metrics.update!(latest_build_started_at: nil, latest_build_finished_at: nil)
mr_5.metrics.update!(latest_build_started_at: nil, latest_build_finished_at: nil)
......@@ -43,5 +46,13 @@ describe Gitlab::CycleAnalytics::TestStage do
end
include_examples 'calculate #median with date range'
context 'when using the new query backend' do
include_examples 'Gitlab::Analytics::CycleAnalytics::DataCollector backend examples' do
let(:expected_record_count) { 2 }
let(:attribute_to_verify) { :id }
let(:expected_ordered_attribute_values) { [mr_1.metrics.pipeline.builds.first.id, mr_2.metrics.pipeline.builds.first.id] }
end
end
end
end
......@@ -55,11 +55,11 @@ shared_examples_for 'cycle analytics stage' do
end
end
describe '#subject_model' do
describe '#subject_class' do
it 'infers the model from the start event' do
stage = described_class.new(valid_params)
expect(stage.subject_model).to eq(MergeRequest)
expect(stage.subject_class).to eq(MergeRequest)
end
end
......@@ -78,4 +78,30 @@ shared_examples_for 'cycle analytics stage' do
expect(stage.end_event).to be_a_kind_of(Gitlab::Analytics::CycleAnalytics::StageEvents::MergeRequestMerged)
end
end
describe '#matches_with_stage_params?' do
let(:params) { Gitlab::Analytics::CycleAnalytics::DefaultStages.params_for_test_stage }
it 'matches with default stage params' do
stage = described_class.new(params)
expect(stage).to be_default_stage
expect(stage).to be_matches_with_stage_params(params)
end
it "mismatches when the stage is custom" do
stage = described_class.new(params.merge(custom: true))
expect(stage).not_to be_default_stage
expect(stage).not_to be_matches_with_stage_params(params)
end
end
describe '#parent_id' do
it "delegates to 'parent_name'_id attribute" do
stage = described_class.new(parent: parent)
expect(stage.parent_id).to eq(parent.id)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment