Commit 9c75fcfb authored by Kerri Miller's avatar Kerri Miller

Merge branch...

Merge branch '299407-dora4-api-support-for-change-failure-rate-backend-service-part-2' into 'master'

Refactor DORA metrics structure

See merge request gitlab-org/gitlab!83862
parents 4633f6a6 7674ba51
......@@ -5,9 +5,9 @@ module Types
graphql_name 'DoraMetricType'
description 'All supported DORA metric types.'
value 'DEPLOYMENT_FREQUENCY', description: 'Deployment frequency.', value: Dora::DailyMetrics::METRIC_DEPLOYMENT_FREQUENCY
value 'LEAD_TIME_FOR_CHANGES', description: 'Lead time for changes.', value: Dora::DailyMetrics::METRIC_LEAD_TIME_FOR_CHANGES
value 'TIME_TO_RESTORE_SERVICE', description: 'Time to restore service.', value: Dora::DailyMetrics::METRIC_TIME_TO_RESTORE_SERVICE
value 'CHANGE_FAILURE_RATE', description: 'Change failure rate.', value: Dora::DailyMetrics::METRIC_CHANGE_FAILURE_RATE
value 'DEPLOYMENT_FREQUENCY', description: 'Deployment frequency.', value: Dora::DeploymentFrequencyMetric::METRIC_NAME
value 'LEAD_TIME_FOR_CHANGES', description: 'Lead time for changes.', value: Dora::LeadTimeForChangesMetric::METRIC_NAME
value 'TIME_TO_RESTORE_SERVICE', description: 'Time to restore service.', value: Dora::TimeToRestoreServiceMetric::METRIC_NAME
value 'CHANGE_FAILURE_RATE', description: 'Change failure rate.', value: Dora::ChangeFailureRateMetric::METRIC_NAME
end
end
# frozen_string_literal: true
module Dora
module BaseMetric
class << self
def for(metric_key)
all_metric_classes.detect { |klass| klass::METRIC_NAME == metric_key } || raise(ArgumentError, 'Unknown metric')
end
def all_metric_classes
[DeploymentFrequencyMetric, LeadTimeForChangesMetric, TimeToRestoreServiceMetric, ChangeFailureRateMetric]
end
end
def initialize(environment, date)
@environment = environment
@date = date
end
# Hash map of columns and queries to calculate data for those columns to store in daily metrics later on.
def data_queries
raise NoMethodError, "method `data_queries` must be overloaded for #{self.class.name}"
end
private
attr_reader :environment, :date
def eligible_deployments
deployments = Deployment.arel_table
[deployments[:environment_id].eq(environment.id),
deployments[:finished_at].gteq(date.beginning_of_day),
deployments[:finished_at].lteq(date.end_of_day),
deployments[:status].eq(Deployment.statuses[:success])].reduce(&:and)
end
end
end
# frozen_string_literal: true
module Dora
class ChangeFailureRateMetric
include BaseMetric
METRIC_NAME = 'change_failure_rate'
def self.calculation_query
'SUM(incidents_count)::float / GREATEST(SUM(deployment_frequency), 1)'
end
def data_queries
# Non-production environments are ignored as we assume all Incidents happen on production
# See https://gitlab.com/gitlab-org/gitlab/-/issues/299096#note_550275633 for details
return {} unless environment.production?
queries = DeploymentFrequencyMetric.new(environment, date).data_queries
queries[:incidents_count] = Issue.incident.select(Issue.arel_table[:id].count)
.where(created_at: date.beginning_of_day..date.end_of_day)
.where(project_id: environment.project_id).to_sql
queries
end
end
end
......@@ -13,12 +13,8 @@ module Dora
INTERVAL_ALL = 'all'
INTERVAL_MONTHLY = 'monthly'
INTERVAL_DAILY = 'daily'
METRIC_DEPLOYMENT_FREQUENCY = 'deployment_frequency'
METRIC_LEAD_TIME_FOR_CHANGES = 'lead_time_for_changes'
METRIC_TIME_TO_RESTORE_SERVICE = 'time_to_restore_service'
METRIC_CHANGE_FAILURE_RATE = 'change_failure_rate'
AVAILABLE_METRICS = [METRIC_DEPLOYMENT_FREQUENCY, METRIC_LEAD_TIME_FOR_CHANGES, METRIC_TIME_TO_RESTORE_SERVICE, METRIC_CHANGE_FAILURE_RATE].freeze
AVAILABLE_INTERVALS = [INTERVAL_ALL, INTERVAL_MONTHLY, INTERVAL_DAILY].freeze
AVAILABLE_METRICS = BaseMetric.all_metric_classes.map { |klass| klass::METRIC_NAME }.freeze
scope :for_environments, -> (environments) do
where(environment: environments)
......@@ -29,54 +25,19 @@ module Dora
end
class << self
def refresh!(environment, date)
raise ArgumentError unless environment.is_a?(::Environment) && date.is_a?(Date)
deployment_frequency = deployment_frequency(environment, date)
lead_time_for_changes = lead_time_for_changes(environment, date)
time_to_restore_service = time_to_restore_service(environment, date)
incidents_count = incidents_count(environment, date)
# This query is concurrent safe upsert with the unique index.
connection.execute(<<~SQL)
INSERT INTO #{table_name} (
environment_id,
date,
deployment_frequency,
lead_time_for_changes_in_seconds,
time_to_restore_service_in_seconds,
incidents_count
)
VALUES (
#{environment.id},
#{connection.quote(date.to_s)},
(#{deployment_frequency}),
(#{lead_time_for_changes}),
(#{time_to_restore_service}),
(#{incidents_count})
)
ON CONFLICT (environment_id, date)
DO UPDATE SET
deployment_frequency = (#{deployment_frequency}),
lead_time_for_changes_in_seconds = (#{lead_time_for_changes}),
time_to_restore_service_in_seconds = (#{time_to_restore_service}),
incidents_count = (#{incidents_count})
SQL
end
def aggregate_for!(metric, interval)
data_query = data_query_for!(metric)
query = "#{BaseMetric.for(metric).calculation_query} as data"
case interval
when INTERVAL_ALL
select(data_query).take.data
select(query).take.data
when INTERVAL_MONTHLY
select("DATE_TRUNC('month', date)::date AS month, #{data_query}")
select("DATE_TRUNC('month', date)::date AS month, #{query}")
.group("DATE_TRUNC('month', date)")
.order('month ASC')
.map { |row| { 'date' => row.month.to_s, 'value' => row.data } }
when INTERVAL_DAILY
select("date, #{data_query}")
select("date, #{query}")
.group('date')
.order('date ASC')
.map { |row| { 'date' => row.date.to_s, 'value' => row.data } }
......@@ -85,87 +46,31 @@ module Dora
end
end
private
def refresh!(environment, date)
raise ArgumentError unless environment.is_a?(::Environment) && date.is_a?(Date)
def data_query_for!(metric)
case metric
when METRIC_DEPLOYMENT_FREQUENCY
'SUM(deployment_frequency) AS data'
when METRIC_LEAD_TIME_FOR_CHANGES
# Median
'(PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY lead_time_for_changes_in_seconds)) AS data'
when METRIC_TIME_TO_RESTORE_SERVICE
# Median
'(PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY time_to_restore_service_in_seconds)) AS data'
when METRIC_CHANGE_FAILURE_RATE
'SUM(incidents_count)::float / GREATEST(SUM(deployment_frequency), 1) as data'
else
raise ArgumentError, 'Unknown metric'
queries_to_refresh = BaseMetric.all_metric_classes.inject({}) do |queries, klass|
queries.merge(klass.new(environment, date).data_queries)
end
end
# Compose a query to calculate "Deployment Frequency" of the date
def deployment_frequency(environment, date)
deployments = Deployment.arel_table
deployments
.project(deployments[:id].count)
.where(eligible_deployments(environment, date))
.to_sql
end
# Compose a query to calculate "Lead Time for Changes" of the date
def lead_time_for_changes(environment, date)
deployments = Deployment.arel_table
deployment_merge_requests = DeploymentMergeRequest.arel_table
merge_request_metrics = MergeRequest::Metrics.arel_table
return unless queries_to_refresh.present?
deployments
.project(
Arel.sql(
'PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY EXTRACT(EPOCH FROM (deployments.finished_at - merge_request_metrics.merged_at)))'
)
)
.join(deployment_merge_requests).on(
deployment_merge_requests[:deployment_id].eq(deployments[:id])
)
.join(merge_request_metrics).on(
merge_request_metrics[:merge_request_id].eq(deployment_merge_requests[:merge_request_id])
# This query is concurrent safe upsert with the unique index.
connection.execute(<<~SQL)
INSERT INTO #{table_name} (
environment_id,
date,
#{queries_to_refresh.keys.join(', ')}
)
.where(eligible_deployments(environment, date))
.to_sql
end
def eligible_deployments(environment, date)
deployments = Deployment.arel_table
[deployments[:environment_id].eq(environment.id),
deployments[:finished_at].gteq(date.beginning_of_day),
deployments[:finished_at].lteq(date.end_of_day),
deployments[:status].eq(Deployment.statuses[:success])].reduce(&:and)
end
def time_to_restore_service(environment, date)
# Non-production environments are ignored as we assume all Incidents happen on production
# See https://gitlab.com/gitlab-org/gitlab/-/issues/299096#note_550275633 for details
return Arel.sql('NULL') unless environment.production?
Issue.incident.closed.select(
Arel.sql(
'PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY EXTRACT(EPOCH FROM (issues.closed_at - issues.created_at)))'
VALUES (
#{environment.id},
#{connection.quote(date.to_s)},
#{queries_to_refresh.map { |_column, query| "(#{query})"}.join(', ')}
)
).where("closed_at >= ? AND closed_at <= ?", date.beginning_of_day, date.end_of_day)
.where(project_id: environment.project_id)
.to_sql
end
def incidents_count(environment, date)
return Arel.sql('NULL') unless environment.production?
Issue.incident.select(Issue.arel_table[:id].count)
.where(created_at: date.beginning_of_day..date.end_of_day)
.where(project_id: environment.project_id)
.to_sql
ON CONFLICT (environment_id, date)
DO UPDATE SET
#{queries_to_refresh.map { |column, query| "#{column} = (#{query})"}.join(', ')}
SQL
end
end
end
......
# frozen_string_literal: true
module Dora
class DeploymentFrequencyMetric
include BaseMetric
METRIC_NAME = 'deployment_frequency'
def self.calculation_query
'SUM(deployment_frequency)'
end
def data_queries
deployments = Deployment.arel_table
{
deployment_frequency: deployments.project(deployments[:id].count).where(eligible_deployments).to_sql
}
end
end
end
# frozen_string_literal: true
module Dora
class LeadTimeForChangesMetric
include BaseMetric
METRIC_NAME = 'lead_time_for_changes'
def self.calculation_query
# Median
'(PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY lead_time_for_changes_in_seconds))'
end
def data_queries
deployments = Deployment.arel_table
deployment_merge_requests = DeploymentMergeRequest.arel_table
merge_request_metrics = MergeRequest::Metrics.arel_table
query = deployments
.project(
Arel.sql(
'PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY EXTRACT(EPOCH FROM (deployments.finished_at - merge_request_metrics.merged_at)))'
)
)
.join(deployment_merge_requests).on(
deployment_merge_requests[:deployment_id].eq(deployments[:id])
)
.join(merge_request_metrics).on(
merge_request_metrics[:merge_request_id].eq(deployment_merge_requests[:merge_request_id])
)
.where(eligible_deployments)
{
lead_time_for_changes_in_seconds: query.to_sql
}
end
end
end
# frozen_string_literal: true
module Dora
class TimeToRestoreServiceMetric
include BaseMetric
METRIC_NAME = 'time_to_restore_service'
def self.calculation_query
# Median
'(PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY time_to_restore_service_in_seconds))'
end
def data_queries
# Non-production environments are ignored as we assume all Incidents happen on production
# See https://gitlab.com/gitlab-org/gitlab/-/issues/299096#note_550275633 for details
#
return {} unless environment.production?
query = Issue.incident.closed.select(
Arel.sql(
'PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY EXTRACT(EPOCH FROM (issues.closed_at - issues.created_at)))'
))
.where(closed_at: date.beginning_of_day..date.end_of_day)
.where(project_id: environment.project_id)
{
time_to_restore_service_in_seconds: query.to_sql
}
end
end
end
......@@ -25,7 +25,7 @@ module EE
::Dora::DailyMetrics::RefreshWorker
.perform_in(5.minutes,
deployment.environment_id,
Time.current.to_date.to_s)
deployment.finished_at.to_date.to_s)
end
end
end
......
......@@ -14,7 +14,7 @@ module Dora
queue_namespace :dora_metrics
feature_category :continuous_delivery
def perform(environment_id, date)
def perform(environment_id, date, _event = nil)
Environment.find_by_id(environment_id).try do |environment|
::Dora::DailyMetrics.refresh!(environment, Date.parse(date))
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Dora::BaseMetric do
describe '.all_metric_classes' do
it 'returns list of 4 metric classes' do
expect(described_class.all_metric_classes)
.to match_array(
[Dora::DeploymentFrequencyMetric,
Dora::LeadTimeForChangesMetric,
Dora::TimeToRestoreServiceMetric,
Dora::ChangeFailureRateMetric]
)
end
end
describe '.for' do
it 'returns metric class by its metric name' do
described_class.all_metric_classes do |klass|
expect(described_class.for(klass::METRIC_NAME)).to eq(klass)
end
end
it 'raises error if there is no defined metric class' do
expect { described_class.for('this-is-not-a-metric-key') }.to raise_error(ArgumentError, 'Unknown metric')
end
end
describe '#data_queries' do
subject do
Object.new.tap do |obj|
obj.extend described_class
end
end
it 'raises a requirement to overload the method' do
expect { subject.data_queries }.to raise_error(NoMethodError, "method `data_queries` must be overloaded for #{subject.class.name}")
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Dora::ChangeFailureRateMetric do
describe '#data_queries' do
subject { described_class.new(environment, date.to_date).data_queries }
let_it_be(:project) { create(:project, :repository) }
let_it_be(:environment) { create(:environment, :production, project: project) }
let_it_be(:date) { 1.day.ago }
around do |example|
freeze_time { example.run }
end
before_all do
create(:incident, project: project, created_at: date.beginning_of_day)
create(:incident, project: project, created_at: date.beginning_of_day + 1.hour)
create(:incident, project: project, created_at: date.end_of_day)
# Issues which shouldn't be included in calculation
create(:issue, project: project, created_at: date) # not an incident
create(:incident, created_at: date) # different project
create(:incident, project: project, created_at: date - 1.year) # different date
create(:incident, project: project, created_at: date + 1.year) # different date
end
context 'for production environment' do
it 'returns number of incidents opened at given date' do
expect(subject.size).to eq 2
expect(Issue.connection.execute(subject[:incidents_count]).first['count']).to be 3
end
it 'inherits data queries from DeploymentFrequency metric' do
allow_next_instance_of(Dora::DeploymentFrequencyMetric) do |instance|
allow(instance).to receive(:data_queries).and_return({ deployment_frequency: 12345 } )
end
expect(subject[:deployment_frequency]).to eq 12345
end
end
context 'for non-production environment' do
let_it_be(:environment) { create(:environment, project: project) }
it 'returns no queries' do
expect(subject.size).to eq 0
end
end
end
end
This diff is collapsed.
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Dora::DeploymentFrequencyMetric do
describe '#data_queries' do
subject { described_class.new(environment, date.to_date).data_queries }
let_it_be(:project) { create(:project, :repository) }
let_it_be(:environment) { create(:environment, project: project) }
let_it_be(:date) { 1.day.ago }
around do |example|
freeze_time { example.run }
end
it 'returns number of finished successful deployments' do
# Matching deployments
create(:deployment, :success, environment: environment, finished_at: date.beginning_of_day)
create(:deployment, :success, environment: environment, finished_at: date)
create(:deployment, :success, environment: environment, finished_at: date.end_of_day)
# Not matching deployments
create(:deployment, :failed, environment: environment, finished_at: date) # failed deployment
create(:deployment, :success, environment: environment, finished_at: date - 1.day) # different day
create(:deployment, :success, environment: environment, finished_at: date + 1.day) # different day
create(:deployment, :success, finished_at: date + 1.day) # different environment
expect(subject.size).to eq 1
expect(Deployment.connection.execute(subject[:deployment_frequency]).first['count']).to be 3
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Dora::LeadTimeForChangesMetric do
describe '#data_queries' do
subject { described_class.new(environment, date.to_date).data_queries }
let_it_be(:project) { create(:project, :repository) }
let_it_be(:environment) { create(:environment, project: project) }
let_it_be(:date) { 1.day.ago }
around do |example|
freeze_time { example.run }
end
it 'returns median of time between merge and deployment' do
create(:merge_request, :with_merged_metrics, project: project, merged_at: date - 1.day)
merge_requests = [
create(:merge_request, :with_merged_metrics, project: project, merged_at: date - 1.day),
create(:merge_request, :with_merged_metrics, project: project, merged_at: date - 2.days),
create(:merge_request, :with_merged_metrics, project: project, merged_at: date - 5.days)
]
# Deployment finished on the date
create(:deployment, :success, environment: environment, finished_at: date, merge_requests: merge_requests)
expect(subject.size).to eq 1
expect(Deployment.connection.execute(subject[:lead_time_for_changes_in_seconds]).first['percentile_cont']).to eql 2.days.to_f
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Dora::TimeToRestoreServiceMetric do
describe '#data_queries' do
subject { described_class.new(environment, date.to_date).data_queries }
let_it_be(:project) { create(:project, :repository) }
let_it_be(:environment) { create(:environment, :production, project: project) }
let_it_be(:date) { 1.day.ago }
around do |example|
freeze_time { example.run }
end
before_all do
create(:incident, :closed, project: project, created_at: date - 7.days, closed_at: date)
create(:incident, :closed, project: project, created_at: date - 5.days, closed_at: date)
create(:incident, :closed, project: project, created_at: date - 3.days, closed_at: date)
create(:incident, :closed, project: project, created_at: date - 1.day, closed_at: date)
# Issues which shouldn't be included in calculation
create(:issue, :closed, project: project, created_at: date - 1.year, closed_at: date) # not an incident
create(:incident, project: project, created_at: date - 1.year) # not closed yet
create(:incident, :closed, created_at: date - 1.year, closed_at: date) # different project
create(:incident, :closed, project: project, created_at: date - 1.year, closed_at: date + 1.day) # different date
end
context 'for production environment' do
it 'returns median of incidents duration closed at given date' do
expect(subject.size).to eq 1
expect(Issue.connection.execute(subject[:time_to_restore_service_in_seconds]).first['percentile_cont']).to eql 4.days.to_f
end
end
context 'for non-production environment' do
let_it_be(:environment) { create(:environment, project: project) }
it 'does not calculate time_to_restore_service daily metric' do
expect(subject.size).to eq 0
end
end
end
end
......@@ -65,11 +65,12 @@ FactoryBot.define do
transient do
merged_by { author }
merged_at { nil }
end
after(:build) do |merge_request, evaluator|
metrics = merge_request.build_metrics
metrics.merged_at = 1.week.from_now
metrics.merged_at = evaluator.merged_at || 1.week.from_now
metrics.merged_by = evaluator.merged_by
metrics.pipeline = create(:ci_empty_pipeline)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment