Commit 8957293d authored by Timothy Andrew's avatar Timothy Andrew

Implement review comments from @yorickpeterse

1. Change multiple updates to a single `update_all`

2. Use cascading deletes

3. Extract an average function for the database median.

4. Move database median to `lib/gitlab/database`

5. Use `delete_all` instead of `destroy_all`

6. Minor refactoring
parent fa890604
......@@ -57,15 +57,13 @@ module Ci
end
after_transition [:created, :pending] => :running do |pipeline|
pipeline.merge_requests.each do |merge_request|
merge_request.metrics.record_latest_build_start_time!(pipeline.started_at) if merge_request.metrics.present?
end
MergeRequest::Metrics.where(merge_request_id: pipeline.merge_requests.map(&:id)).
update_all(latest_build_started_at: pipeline.started_at, latest_build_finished_at: nil)
end
after_transition any => [:success] do |pipeline|
pipeline.merge_requests.each do |merge_request|
merge_request.metrics.record_latest_build_finish_time!(pipeline.finished_at) if merge_request.metrics.present?
end
MergeRequest::Metrics.where(merge_request_id: pipeline.merge_requests.map(&:id)).
update_all(latest_build_finished_at: pipeline.finished_at)
end
before_transition do |pipeline|
......@@ -292,8 +290,11 @@ module Ci
# Merge requests for which the current pipeline is running against
# the merge request's latest commit.
def merge_requests
project.merge_requests.where(source_branch: self.ref).
select { |merge_request| merge_request.pipeline.try(:id) == self.id }
@merge_requests ||=
begin
project.merge_requests.where(source_branch: self.ref).
select { |merge_request| merge_request.pipeline.try(:id) == self.id }
end
end
private
......
# https://www.periscopedata.com/blog/medians-in-sql.html
module DatabaseMedian
extend ActiveSupport::Concern
def median_datetime(arel_table, query_so_far, column_sym)
case ActiveRecord::Base.connection.adapter_name
when 'PostgreSQL'
pg_median_datetime(arel_table, query_so_far, column_sym)
when 'Mysql2'
mysql_median_datetime(arel_table, query_so_far, column_sym)
else
raise NotImplementedError, "We haven't implemented a database median strategy for your database type."
end
end
def mysql_median_datetime(arel_table, query_so_far, column_sym)
query = arel_table.
from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)).
project(Arel::Nodes::NamedFunction.new("AVG", [arel_table[column_sym]]).as('median')).
where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"),
Arel::Nodes::And.new([Arel.sql('@ct/2.0'),
Arel.sql('@ct/2.0 + 1')]))).
# Disallow negative values
where(arel_table[column_sym].gteq(0))
[Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"),
Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"),
Arel.sql("set @row_id := 0;"),
query,
Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")]
end
def pg_median_datetime(arel_table, query_so_far, column_sym)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
# column, the CTE might look like this:
#
# star_count | row_id | ct
# ------------+--------+----
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
cte_table = Arel::Table.new("ordered_records")
cte = Arel::Nodes::As.new(cte_table,
arel_table.
project(arel_table[column_sym].as(column_sym.to_s),
Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []),
Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'),
arel_table.project("COUNT(1)").as('ct')).
# Disallow negative values
where(arel_table[column_sym].gteq(zero_interval)))
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
cte_table.project(Arel::Nodes::NamedFunction.new("AVG",
[extract_epoch(cte_table[column_sym])],
"median")).
where(Arel::Nodes::Between.new(cte_table[:row_id],
Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')),
(cte_table[:ct] / Arel.sql('2.0') + 1)]))).
with(query_so_far, cte)
end
private
def extract_epoch(arel_attribute)
Arel.sql("EXTRACT(EPOCH FROM \"#{arel_attribute.relation.name}\".\"#{arel_attribute.name}\")")
end
# Need to cast '0' to an INTERVAL before we can check if the interval is positive
def zero_interval
Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")])
end
end
class CycleAnalytics
include DatabaseMedian
include Gitlab::Database::Median
def initialize(project, from:)
@project = project
......@@ -122,7 +122,9 @@ class CycleAnalytics
case ActiveRecord::Base.connection.adapter_name
when 'PostgreSQL'
result.first['median'].to_f
result = result.first.presence
median = result['median'] if result
median.to_f if median
when 'Mysql2'
result.to_a.flatten.first
end
......
class CycleAnalytics
class Queries
def initialize(project)
@project = project
end
def issues(options = {})
@issues_data ||=
begin
issues_query(options).preload(:metrics).map { |issue| { issue: issue } }
end
end
def merge_requests_closing_issues(options = {})
@merge_requests_closing_issues_data ||=
begin
merge_requests_closing_issues = MergeRequestsClosingIssues.where(issue: issues_query(options)).preload(issue: [:metrics], merge_request: [:metrics])
merge_requests_closing_issues.map do |record|
{ issue: record.issue, merge_request: record.merge_request }
end
end
end
def issue_first_associated_with_milestone_at
lambda do |data_point|
issue = data_point[:issue]
issue.metrics.first_associated_with_milestone_at if issue.metrics.present?
end
end
def issue_first_added_to_list_label_at
lambda do |data_point|
issue = data_point[:issue]
issue.metrics.first_added_to_board_at if issue.metrics.present?
end
end
def issue_first_mentioned_in_commit_at
lambda do |data_point|
issue = data_point[:issue]
issue.metrics.first_mentioned_in_commit_at if issue.metrics.present?
end
end
def merge_request_merged_at
lambda do |data_point|
merge_request = data_point[:merge_request]
merge_request.metrics.merged_at if merge_request.metrics.present?
end
end
def merge_request_build_started_at
lambda do |data_point|
merge_request = data_point[:merge_request]
merge_request.metrics.latest_build_started_at if merge_request.metrics.present?
end
end
def merge_request_build_finished_at
lambda do |data_point|
merge_request = data_point[:merge_request]
merge_request.metrics.latest_build_finished_at if merge_request.metrics.present?
end
end
def merge_request_deployed_to_production_at
lambda do |data_point|
merge_request = data_point[:merge_request]
merge_request.metrics.first_deployed_to_production_at if merge_request.metrics.present?
end
end
private
def issues_query(created_after:)
@project.issues.where("created_at >= ?", created_after)
end
end
end
......@@ -42,4 +42,35 @@ class Deployment < ActiveRecord::Base
project.repository.is_ancestor?(commit.id, sha)
end
def update_merge_request_metrics
if environment.update_merge_request_metrics?
query = project.merge_requests.
joins(:metrics).
where(target_branch: self.ref, merge_request_metrics: { first_deployed_to_production_at: nil })
merge_requests =
if previous_deployment
query.where("merge_request_metrics.merged_at <= ? AND merge_request_metrics.merged_at >= ?",
self.created_at,
previous_deployment.created_at)
else
query.where("merge_request_metrics.merged_at <= ?", self.created_at)
end
# Need to use `map` instead of `select` because MySQL doesn't allow `SELECT`ing from the same table
# that we're updating.
MergeRequest::Metrics.where(merge_request_id: merge_requests.map(&:id), first_deployed_to_production_at: nil).
update_all(first_deployed_to_production_at: self.created_at)
end
end
def previous_deployment
@previous_deployment ||=
project.deployments.joins(:environment).
where(environments: { name: self.environment.name }, ref: self.ref).
where.not(id: self.id).
take
end
end
......@@ -43,4 +43,8 @@ class Environment < ActiveRecord::Base
last_deployment.includes_commit?(commit)
end
def update_merge_request_metrics?
self.name == "production"
end
end
......@@ -23,9 +23,9 @@ class Issue < ActiveRecord::Base
has_many :events, as: :target, dependent: :destroy
has_one :metrics, dependent: :destroy
has_one :metrics
has_many :merge_requests_closing_issues, class_name: MergeRequestsClosingIssues
has_many :merge_requests_closing_issues, class_name: 'MergeRequestsClosingIssues'
has_many :closed_by_merge_requests, through: :merge_requests_closing_issues, source: :merge_request
validates :project, presence: true
......@@ -202,7 +202,7 @@ class Issue < ActiveRecord::Base
# From all notes on this issue, we'll select the system notes about linked
# merge requests. Of those, the MRs closing `self` are returned.
def closed_by_merge_requests(current_user = nil)
return [] if !open?
return [] unless open?
ext = all_references(current_user)
......
......@@ -13,10 +13,6 @@ class Issue::Metrics < ActiveRecord::Base
self.save if self.changed?
end
def record_commit_mention!(commit_time)
self.update(first_mentioned_in_commit_at: commit_time) if self.first_mentioned_in_commit_at.blank?
end
private
def issue_assigned_to_list_label?
......
......@@ -13,11 +13,11 @@ class MergeRequest < ActiveRecord::Base
has_many :merge_request_diffs, dependent: :destroy
has_one :merge_request_diff,
-> { order('merge_request_diffs.id DESC') }
has_one :metrics, dependent: :destroy
has_one :metrics
has_many :events, as: :target, dependent: :destroy
has_many :merge_requests_closing_issues, class_name: MergeRequestsClosingIssues
has_many :merge_requests_closing_issues, class_name: 'MergeRequestsClosingIssues'
has_many :issues_closed, through: :merge_requests_closing_issues, source: :issue
serialize :merge_params, Hash
......@@ -513,7 +513,7 @@ class MergeRequest < ActiveRecord::Base
# running `ReferenceExtractor` on each of them separately.
def cache_merge_request_closes_issues!(current_user = self.author)
transaction do
self.merge_requests_closing_issues.destroy_all
self.merge_requests_closing_issues.delete_all
closes_issues(current_user).each do |issue|
MergeRequestsClosingIssues.create!(merge_request: self, issue: issue)
end
......
......@@ -8,16 +8,4 @@ class MergeRequest::Metrics < ActiveRecord::Base
self.save if self.changed?
end
def record_production_deploy!(deploy_time)
self.update(first_deployed_to_production_at: deploy_time) if self.first_deployed_to_production_at.blank?
end
def record_latest_build_start_time!(start_time)
self.update(latest_build_started_at: start_time, latest_build_finished_at: nil)
end
def record_latest_build_finish_time!(finish_time)
self.update(latest_build_finished_at: finish_time)
end
end
......@@ -13,39 +13,13 @@ class CreateDeploymentService < BaseService
deployable: deployable
)
update_merge_request_metrics(deployment, environment)
deployment.update_merge_request_metrics
deployment
end
private
def update_merge_request_metrics(deployment, environment)
if environment.name == "production"
query = project.merge_requests.joins("LEFT OUTER JOIN merge_request_metrics ON merge_request_metrics.merge_request_id = merge_requests.id").
where(target_branch: params[:ref], "merge_request_metrics.first_deployed_to_production_at" => nil)
previous_deployment = previous_deployment_for_ref(deployment)
merge_requests_deployed_to_production_for_first_time = if previous_deployment
query.where("merge_request_metrics.merged_at < ? AND merge_request_metrics.merged_at > ?", deployment.created_at, previous_deployment.created_at)
else
query.where("merge_request_metrics.merged_at < ?", deployment.created_at)
end
merge_requests_deployed_to_production_for_first_time.each { |merge_request| merge_request.metrics.record_production_deploy!(deployment.created_at) }
end
end
def previous_deployment_for_ref(current_deployment)
@previous_deployment_for_ref ||=
project.deployments.joins(:environment).
where("environments.name": params[:environment], ref: params[:ref]).
where.not(id: current_deployment.id).
first
end
private
def find_or_create_environment
project.environments.find_or_create_by(name: expanded_name) do |environment|
environment.external_url = expanded_url
......
......@@ -190,6 +190,8 @@ class GitPushService < BaseService
def update_issue_metrics(commit, authors)
mentioned_issues = commit.all_references(authors[commit]).issues
mentioned_issues.each { |issue| issue.metrics.record_commit_mention!(commit.committed_date) if issue.metrics.present? }
Issue::Metrics.where(issue_id: mentioned_issues.map(&:id), first_mentioned_in_commit_at: nil).
update_all(first_mentioned_in_commit_at: commit.committed_date)
end
end
......@@ -4,7 +4,7 @@ require './spec/support/test_env'
class Gitlab::Seeder::CycleAnalytics
def initialize(project, perf: false)
@project = project
@user = User.find(1)
@user = User.order(:id).last
@issue_count = perf ? 1000 : 5
stub_git_pre_receive!
end
......
......@@ -5,12 +5,12 @@ class AddTableIssueMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
DOWNTIME = true
# When a migration requires downtime you **must** uncomment the following
# constant and define a short and easy to understand explanation as to why the
# migration requires downtime.
# DOWNTIME_REASON = ''
DOWNTIME_REASON = 'Adding foreign key'
# When using the methods "add_concurrent_index" or "add_column_with_default"
# you must disable the use of transactions as these methods can not run in an
......@@ -25,7 +25,7 @@ class AddTableIssueMetrics < ActiveRecord::Migration
def change
create_table :issue_metrics do |t|
t.references :issue, index: { name: "index_issue_metrics" }, foreign_key: true, null: false
t.references :issue, index: { name: "index_issue_metrics" }, foreign_key: true, dependent: :delete, null: false
t.datetime 'first_mentioned_in_commit_at'
t.datetime 'first_associated_with_milestone_at'
......
......@@ -5,12 +5,12 @@ class AddTableMergeRequestMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
DOWNTIME = true
# When a migration requires downtime you **must** uncomment the following
# constant and define a short and easy to understand explanation as to why the
# migration requires downtime.
# DOWNTIME_REASON = ''
DOWNTIME_REASON = 'Adding foreign key'
# When using the methods "add_concurrent_index" or "add_column_with_default"
# you must disable the use of transactions as these methods can not run in an
......@@ -25,7 +25,7 @@ class AddTableMergeRequestMetrics < ActiveRecord::Migration
def change
create_table :merge_request_metrics do |t|
t.references :merge_request, index: { name: "index_merge_request_metrics" }, foreign_key: true, null: false
t.references :merge_request, index: { name: "index_merge_request_metrics" }, foreign_key: true, dependent: :delete, null: false
t.datetime 'latest_build_started_at'
t.datetime 'latest_build_finished_at'
......
......@@ -25,8 +25,8 @@ class CreateMergeRequestsClosingIssues < ActiveRecord::Migration
def change
create_table :merge_requests_closing_issues do |t|
t.references :merge_request, foreign_key: true, index: true, null: false
t.references :issue, foreign_key: true, index: true, null: false
t.references :merge_request, foreign_key: true, index: true, dependent: :delete, null: false
t.references :issue, foreign_key: true, index: true, dependent: :delete, null: false
t.timestamps null: false
end
......
# https://www.periscopedata.com/blog/medians-in-sql.html
module Gitlab
module Database
module Median
def median_datetime(arel_table, query_so_far, column_sym)
case ActiveRecord::Base.connection.adapter_name
when 'PostgreSQL'
pg_median_datetime(arel_table, query_so_far, column_sym)
when 'Mysql2'
mysql_median_datetime(arel_table, query_so_far, column_sym)
else
raise NotImplementedError, "We haven't implemented a database median strategy for your database type."
end
end
def mysql_median_datetime(arel_table, query_so_far, column_sym)
query = arel_table.
from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)).
project(average([arel_table[column_sym]], 'median')).
where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"),
Arel::Nodes::And.new([Arel.sql('@ct/2.0'),
Arel.sql('@ct/2.0 + 1')]))).
# Disallow negative values
where(arel_table[column_sym].gteq(0))
[Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"),
Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"),
Arel.sql("set @row_id := 0;"),
query,
Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")]
end
def pg_median_datetime(arel_table, query_so_far, column_sym)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
# column, the CTE might look like this:
#
# star_count | row_id | ct
# ------------+--------+----
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
cte_table = Arel::Table.new("ordered_records")
cte = Arel::Nodes::As.new(cte_table,
arel_table.
project(arel_table[column_sym].as(column_sym.to_s),
Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []),
Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'),
arel_table.project("COUNT(1)").as('ct')).
# Disallow negative values
where(arel_table[column_sym].gteq(zero_interval)))
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")).
where(Arel::Nodes::Between.new(cte_table[:row_id],
Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')),
(cte_table[:ct] / Arel.sql('2.0') + 1)]))).
with(query_so_far, cte)
end
private
def average(args, as)
Arel::Nodes::NamedFunction.new("AVG", args, as)
end
def extract_epoch(arel_attribute)
Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")})
end
# Need to cast '0' to an INTERVAL before we can check if the interval is positive
def zero_interval
Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")])
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment