Commit 2ad17865 authored by Patrick Bajao's avatar Patrick Bajao

Merge branch '335291-store-hash-for-vsa-stages' into 'master'

Store hashcode for VSA stage events

See merge request gitlab-org/gitlab!67259
parents b6fffef8 760ab23c
# frozen_string_literal: true
module Analytics
module CycleAnalytics
class StageEventHash < ApplicationRecord
has_many :cycle_analytics_project_stages, class_name: 'Analytics::CycleAnalytics::ProjectStage', inverse_of: :stage_event_hash
validates :hash_sha256, presence: true
# Creates or queries the id of the corresponding stage event hash code
def self.record_id_by_hash_sha256(hash)
casted_hash_code = Arel::Nodes.build_quoted(hash, Analytics::CycleAnalytics::StageEventHash.arel_table[:hash_sha256]).to_sql
# Atomic, safe insert without retrying
query = <<~SQL
WITH insert_cte AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
INSERT INTO #{quoted_table_name} (hash_sha256) VALUES (#{casted_hash_code}) ON CONFLICT DO NOTHING RETURNING ID
)
SELECT ids.id FROM (
(SELECT id FROM #{quoted_table_name} WHERE hash_sha256=#{casted_hash_code} LIMIT 1)
UNION ALL
(SELECT id FROM insert_cte LIMIT 1)
) AS ids LIMIT 1
SQL
connection.execute(query).first['id']
end
def self.cleanup_if_unused(id)
unused_hashes_for(id)
.where(id: id)
.delete_all
end
def self.unused_hashes_for(id)
exists_query = Analytics::CycleAnalytics::ProjectStage.where(stage_event_hash_id: id).select('1').limit(1)
where.not('EXISTS (?)', exists_query)
end
end
end
end
Analytics::CycleAnalytics::StageEventHash.prepend_mod_with('Analytics::CycleAnalytics::StageEventHash')
...@@ -10,6 +10,7 @@ module Analytics ...@@ -10,6 +10,7 @@ module Analytics
included do included do
belongs_to :start_event_label, class_name: 'GroupLabel', optional: true belongs_to :start_event_label, class_name: 'GroupLabel', optional: true
belongs_to :end_event_label, class_name: 'GroupLabel', optional: true belongs_to :end_event_label, class_name: 'GroupLabel', optional: true
belongs_to :stage_event_hash, class_name: 'Analytics::CycleAnalytics::StageEventHash', foreign_key: :stage_event_hash_id, optional: true
validates :name, presence: true validates :name, presence: true
validates :name, exclusion: { in: Gitlab::Analytics::CycleAnalytics::DefaultStages.names }, if: :custom? validates :name, exclusion: { in: Gitlab::Analytics::CycleAnalytics::DefaultStages.names }, if: :custom?
...@@ -28,6 +29,9 @@ module Analytics ...@@ -28,6 +29,9 @@ module Analytics
scope :ordered, -> { order(:relative_position, :id) } scope :ordered, -> { order(:relative_position, :id) }
scope :for_list, -> { includes(:start_event_label, :end_event_label).ordered } scope :for_list, -> { includes(:start_event_label, :end_event_label).ordered }
scope :by_value_stream, -> (value_stream) { where(value_stream_id: value_stream.id) } scope :by_value_stream, -> (value_stream) { where(value_stream_id: value_stream.id) }
before_save :ensure_stage_event_hash_id
after_commit :cleanup_old_stage_event_hash
end end
def parent=(_) def parent=(_)
...@@ -133,6 +137,20 @@ module Analytics ...@@ -133,6 +137,20 @@ module Analytics
.id_in(label_id) .id_in(label_id)
.exists? .exists?
end end
def ensure_stage_event_hash_id
previous_stage_event_hash = stage_event_hash&.hash_sha256
if previous_stage_event_hash.blank? || events_hash_code != previous_stage_event_hash
self.stage_event_hash_id = Analytics::CycleAnalytics::StageEventHash.record_id_by_hash_sha256(events_hash_code)
end
end
def cleanup_old_stage_event_hash
if stage_event_hash_id_previously_changed? && stage_event_hash_id_previously_was
Analytics::CycleAnalytics::StageEventHash.cleanup_if_unused(stage_event_hash_id_previously_was)
end
end
end end
end end
end end
# frozen_string_literal: true
class CreateAnalyticsCycleAnalyticsStageEventHashes < ActiveRecord::Migration[6.1]
def change
create_table :analytics_cycle_analytics_stage_event_hashes do |t|
t.binary :hash_sha256
t.index :hash_sha256, unique: true, name: 'index_cycle_analytics_stage_event_hashes_on_hash_sha_256'
end
end
end
# frozen_string_literal: true
class AddStageHashFkToProjectStages < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
def up
unless column_exists?(:analytics_cycle_analytics_project_stages, :stage_event_hash_id)
add_column :analytics_cycle_analytics_project_stages, :stage_event_hash_id, :bigint
end
add_concurrent_index :analytics_cycle_analytics_project_stages, :stage_event_hash_id, name: 'index_project_stages_on_stage_event_hash_id'
add_concurrent_foreign_key :analytics_cycle_analytics_project_stages, :analytics_cycle_analytics_stage_event_hashes, column: :stage_event_hash_id, on_delete: :cascade
end
def down
remove_column :analytics_cycle_analytics_project_stages, :stage_event_hash_id
end
end
# frozen_string_literal: true
class AddStageHashFkToGroupStages < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
def up
unless column_exists?(:analytics_cycle_analytics_group_stages, :stage_event_hash_id)
add_column :analytics_cycle_analytics_group_stages, :stage_event_hash_id, :bigint
end
add_concurrent_index :analytics_cycle_analytics_group_stages, :stage_event_hash_id, name: 'index_group_stages_on_stage_event_hash_id'
add_concurrent_foreign_key :analytics_cycle_analytics_group_stages, :analytics_cycle_analytics_stage_event_hashes, column: :stage_event_hash_id, on_delete: :cascade
end
def down
remove_column :analytics_cycle_analytics_group_stages, :stage_event_hash_id
end
end
f819eaed7e387f18f066180cbf9d0849b3e38db95bbf3e8487d3bc58d9b489ae
\ No newline at end of file
cb97b869bfb0b76dd0684aca1f40c86e7c1c9c9a0d52684830115288088e8066
\ No newline at end of file
5c104ffdb64943aa4828a9b961c8f9141dfd2ae861cea7116722d2b0d4598957
\ No newline at end of file
...@@ -9085,7 +9085,8 @@ CREATE TABLE analytics_cycle_analytics_group_stages ( ...@@ -9085,7 +9085,8 @@ CREATE TABLE analytics_cycle_analytics_group_stages (
hidden boolean DEFAULT false NOT NULL, hidden boolean DEFAULT false NOT NULL,
custom boolean DEFAULT true NOT NULL, custom boolean DEFAULT true NOT NULL,
name character varying(255) NOT NULL, name character varying(255) NOT NULL,
group_value_stream_id bigint NOT NULL group_value_stream_id bigint NOT NULL,
stage_event_hash_id bigint
); );
CREATE SEQUENCE analytics_cycle_analytics_group_stages_id_seq CREATE SEQUENCE analytics_cycle_analytics_group_stages_id_seq
...@@ -9128,7 +9129,8 @@ CREATE TABLE analytics_cycle_analytics_project_stages ( ...@@ -9128,7 +9129,8 @@ CREATE TABLE analytics_cycle_analytics_project_stages (
hidden boolean DEFAULT false NOT NULL, hidden boolean DEFAULT false NOT NULL,
custom boolean DEFAULT true NOT NULL, custom boolean DEFAULT true NOT NULL,
name character varying(255) NOT NULL, name character varying(255) NOT NULL,
project_value_stream_id bigint NOT NULL project_value_stream_id bigint NOT NULL,
stage_event_hash_id bigint
); );
CREATE SEQUENCE analytics_cycle_analytics_project_stages_id_seq CREATE SEQUENCE analytics_cycle_analytics_project_stages_id_seq
...@@ -9158,6 +9160,20 @@ CREATE SEQUENCE analytics_cycle_analytics_project_value_streams_id_seq ...@@ -9158,6 +9160,20 @@ CREATE SEQUENCE analytics_cycle_analytics_project_value_streams_id_seq
ALTER SEQUENCE analytics_cycle_analytics_project_value_streams_id_seq OWNED BY analytics_cycle_analytics_project_value_streams.id; ALTER SEQUENCE analytics_cycle_analytics_project_value_streams_id_seq OWNED BY analytics_cycle_analytics_project_value_streams.id;
CREATE TABLE analytics_cycle_analytics_stage_event_hashes (
id bigint NOT NULL,
hash_sha256 bytea
);
CREATE SEQUENCE analytics_cycle_analytics_stage_event_hashes_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE analytics_cycle_analytics_stage_event_hashes_id_seq OWNED BY analytics_cycle_analytics_stage_event_hashes.id;
CREATE TABLE analytics_devops_adoption_segments ( CREATE TABLE analytics_devops_adoption_segments (
id bigint NOT NULL, id bigint NOT NULL,
last_recorded_at timestamp with time zone, last_recorded_at timestamp with time zone,
...@@ -19925,6 +19941,8 @@ ALTER TABLE ONLY analytics_cycle_analytics_project_stages ALTER COLUMN id SET DE ...@@ -19925,6 +19941,8 @@ ALTER TABLE ONLY analytics_cycle_analytics_project_stages ALTER COLUMN id SET DE
ALTER TABLE ONLY analytics_cycle_analytics_project_value_streams ALTER COLUMN id SET DEFAULT nextval('analytics_cycle_analytics_project_value_streams_id_seq'::regclass); ALTER TABLE ONLY analytics_cycle_analytics_project_value_streams ALTER COLUMN id SET DEFAULT nextval('analytics_cycle_analytics_project_value_streams_id_seq'::regclass);
ALTER TABLE ONLY analytics_cycle_analytics_stage_event_hashes ALTER COLUMN id SET DEFAULT nextval('analytics_cycle_analytics_stage_event_hashes_id_seq'::regclass);
ALTER TABLE ONLY analytics_devops_adoption_segments ALTER COLUMN id SET DEFAULT nextval('analytics_devops_adoption_segments_id_seq'::regclass); ALTER TABLE ONLY analytics_devops_adoption_segments ALTER COLUMN id SET DEFAULT nextval('analytics_devops_adoption_segments_id_seq'::regclass);
ALTER TABLE ONLY analytics_devops_adoption_snapshots ALTER COLUMN id SET DEFAULT nextval('analytics_devops_adoption_snapshots_id_seq'::regclass); ALTER TABLE ONLY analytics_devops_adoption_snapshots ALTER COLUMN id SET DEFAULT nextval('analytics_devops_adoption_snapshots_id_seq'::regclass);
...@@ -21044,6 +21062,9 @@ ALTER TABLE ONLY analytics_cycle_analytics_project_stages ...@@ -21044,6 +21062,9 @@ ALTER TABLE ONLY analytics_cycle_analytics_project_stages
ALTER TABLE ONLY analytics_cycle_analytics_project_value_streams ALTER TABLE ONLY analytics_cycle_analytics_project_value_streams
ADD CONSTRAINT analytics_cycle_analytics_project_value_streams_pkey PRIMARY KEY (id); ADD CONSTRAINT analytics_cycle_analytics_project_value_streams_pkey PRIMARY KEY (id);
ALTER TABLE ONLY analytics_cycle_analytics_stage_event_hashes
ADD CONSTRAINT analytics_cycle_analytics_stage_event_hashes_pkey PRIMARY KEY (id);
ALTER TABLE ONLY analytics_devops_adoption_segments ALTER TABLE ONLY analytics_devops_adoption_segments
ADD CONSTRAINT analytics_devops_adoption_segments_pkey PRIMARY KEY (id); ADD CONSTRAINT analytics_devops_adoption_segments_pkey PRIMARY KEY (id);
...@@ -23536,6 +23557,8 @@ CREATE INDEX index_custom_emoji_on_creator_id ON custom_emoji USING btree (creat ...@@ -23536,6 +23557,8 @@ CREATE INDEX index_custom_emoji_on_creator_id ON custom_emoji USING btree (creat
CREATE UNIQUE INDEX index_custom_emoji_on_namespace_id_and_name ON custom_emoji USING btree (namespace_id, name); CREATE UNIQUE INDEX index_custom_emoji_on_namespace_id_and_name ON custom_emoji USING btree (namespace_id, name);
CREATE UNIQUE INDEX index_cycle_analytics_stage_event_hashes_on_hash_sha_256 ON analytics_cycle_analytics_stage_event_hashes USING btree (hash_sha256);
CREATE UNIQUE INDEX index_daily_build_group_report_results_unique_columns ON ci_daily_build_group_report_results USING btree (project_id, ref_path, date, group_name); CREATE UNIQUE INDEX index_daily_build_group_report_results_unique_columns ON ci_daily_build_group_report_results USING btree (project_id, ref_path, date, group_name);
CREATE INDEX index_dast_profile_schedules_active_next_run_at ON dast_profile_schedules USING btree (active, next_run_at); CREATE INDEX index_dast_profile_schedules_active_next_run_at ON dast_profile_schedules USING btree (active, next_run_at);
...@@ -23960,6 +23983,8 @@ CREATE INDEX index_group_repository_storage_moves_on_group_id ON group_repositor ...@@ -23960,6 +23983,8 @@ CREATE INDEX index_group_repository_storage_moves_on_group_id ON group_repositor
CREATE UNIQUE INDEX index_group_stages_on_group_id_group_value_stream_id_and_name ON analytics_cycle_analytics_group_stages USING btree (group_id, group_value_stream_id, name); CREATE UNIQUE INDEX index_group_stages_on_group_id_group_value_stream_id_and_name ON analytics_cycle_analytics_group_stages USING btree (group_id, group_value_stream_id, name);
CREATE INDEX index_group_stages_on_stage_event_hash_id ON analytics_cycle_analytics_group_stages USING btree (stage_event_hash_id);
CREATE UNIQUE INDEX index_group_wiki_repositories_on_disk_path ON group_wiki_repositories USING btree (disk_path); CREATE UNIQUE INDEX index_group_wiki_repositories_on_disk_path ON group_wiki_repositories USING btree (disk_path);
CREATE INDEX index_group_wiki_repositories_on_shard_id ON group_wiki_repositories USING btree (shard_id); CREATE INDEX index_group_wiki_repositories_on_shard_id ON group_wiki_repositories USING btree (shard_id);
...@@ -24762,6 +24787,8 @@ CREATE INDEX index_project_settings_on_project_id_partially ON project_settings ...@@ -24762,6 +24787,8 @@ CREATE INDEX index_project_settings_on_project_id_partially ON project_settings
CREATE UNIQUE INDEX index_project_settings_on_push_rule_id ON project_settings USING btree (push_rule_id); CREATE UNIQUE INDEX index_project_settings_on_push_rule_id ON project_settings USING btree (push_rule_id);
CREATE INDEX index_project_stages_on_stage_event_hash_id ON analytics_cycle_analytics_project_stages USING btree (stage_event_hash_id);
CREATE INDEX index_project_statistics_on_namespace_id ON project_statistics USING btree (namespace_id); CREATE INDEX index_project_statistics_on_namespace_id ON project_statistics USING btree (namespace_id);
CREATE INDEX index_project_statistics_on_packages_size_and_project_id ON project_statistics USING btree (packages_size, project_id); CREATE INDEX index_project_statistics_on_packages_size_and_project_id ON project_statistics USING btree (packages_size, project_id);
...@@ -26073,6 +26100,9 @@ ALTER TABLE ONLY members ...@@ -26073,6 +26100,9 @@ ALTER TABLE ONLY members
ALTER TABLE ONLY lfs_objects_projects ALTER TABLE ONLY lfs_objects_projects
ADD CONSTRAINT fk_2eb33f7a78 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE NOT VALID; ADD CONSTRAINT fk_2eb33f7a78 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE NOT VALID;
ALTER TABLE ONLY analytics_cycle_analytics_group_stages
ADD CONSTRAINT fk_3078345d6d FOREIGN KEY (stage_event_hash_id) REFERENCES analytics_cycle_analytics_stage_event_hashes(id) ON DELETE CASCADE;
ALTER TABLE ONLY lists ALTER TABLE ONLY lists
ADD CONSTRAINT fk_30f2a831f4 FOREIGN KEY (iteration_id) REFERENCES sprints(id) ON DELETE CASCADE; ADD CONSTRAINT fk_30f2a831f4 FOREIGN KEY (iteration_id) REFERENCES sprints(id) ON DELETE CASCADE;
...@@ -26514,6 +26544,9 @@ ALTER TABLE ONLY packages_packages ...@@ -26514,6 +26544,9 @@ ALTER TABLE ONLY packages_packages
ALTER TABLE ONLY geo_event_log ALTER TABLE ONLY geo_event_log
ADD CONSTRAINT fk_c1f241c70d FOREIGN KEY (upload_deleted_event_id) REFERENCES geo_upload_deleted_events(id) ON DELETE CASCADE; ADD CONSTRAINT fk_c1f241c70d FOREIGN KEY (upload_deleted_event_id) REFERENCES geo_upload_deleted_events(id) ON DELETE CASCADE;
ALTER TABLE ONLY analytics_cycle_analytics_project_stages
ADD CONSTRAINT fk_c3339bdfc9 FOREIGN KEY (stage_event_hash_id) REFERENCES analytics_cycle_analytics_stage_event_hashes(id) ON DELETE CASCADE;
ALTER TABLE ONLY vulnerability_exports ALTER TABLE ONLY vulnerability_exports
ADD CONSTRAINT fk_c3d3cb5d0f FOREIGN KEY (group_id) REFERENCES namespaces(id) ON DELETE CASCADE; ADD CONSTRAINT fk_c3d3cb5d0f FOREIGN KEY (group_id) REFERENCES namespaces(id) ON DELETE CASCADE;
# frozen_string_literal: true
module EE
module Analytics
module CycleAnalytics
module StageEventHash
extend ActiveSupport::Concern
prepended do
has_many :cycle_analytics_group_stages, class_name: 'Analytics::CycleAnalytics::GroupStage', inverse_of: :stage_event_hash
end
class_methods do
def unused_hashes_for(id)
exists_query = ::Analytics::CycleAnalytics::GroupStage.where(stage_event_hash_id: id).select('1').limit(1)
super.where.not('EXISTS (?)', exists_query)
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Analytics::CycleAnalytics::StageEventHash, type: :model do
let(:stage_event_hash) { described_class.create!(hash_sha256: hash_sha256) }
let(:hash_sha256) { 'does_not_matter' }
describe 'associations' do
it { is_expected.to have_many(:cycle_analytics_group_stages) }
end
describe '.cleanup_if_unused' do
it 'removes the record if there is no project or group stages with given stage events hash' do
described_class.cleanup_if_unused(stage_event_hash.id)
expect(described_class.find_by_id(stage_event_hash.id)).to be_nil
end
it 'does not remove the record if at least 1 group stage for the given stage events hash exists' do
id = create(:cycle_analytics_group_stage).stage_event_hash_id
described_class.cleanup_if_unused(id)
expect(described_class.find_by_id(id)).not_to be_nil
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Analytics::CycleAnalytics::StageEventHash, type: :model do
let(:stage_event_hash) { described_class.create!(hash_sha256: hash_sha256) }
let(:hash_sha256) { 'does_not_matter' }
describe 'associations' do
it { is_expected.to have_many(:cycle_analytics_project_stages) }
end
describe 'validations' do
it { is_expected.to validate_presence_of(:hash_sha256) }
end
describe '.record_id_by_hash_sha256' do
it 'returns an existing id' do
id = stage_event_hash.id
same_id = described_class.record_id_by_hash_sha256(hash_sha256)
expect(same_id).to eq(id)
end
it 'creates a new record' do
expect do
described_class.record_id_by_hash_sha256(hash_sha256)
end.to change { described_class.count }.from(0).to(1)
end
end
describe '.cleanup_if_unused' do
it 'removes the record' do
described_class.cleanup_if_unused(stage_event_hash.id)
expect(described_class.find_by_id(stage_event_hash.id)).to be_nil
end
it 'does not remove the record' do
id = create(:cycle_analytics_project_stage).stage_event_hash_id
described_class.cleanup_if_unused(id)
expect(described_class.find_by_id(id)).not_to be_nil
end
end
end
...@@ -13,6 +13,7 @@ RSpec.shared_examples 'value stream analytics stage' do ...@@ -13,6 +13,7 @@ RSpec.shared_examples 'value stream analytics stage' do
describe 'associations' do describe 'associations' do
it { is_expected.to belong_to(:end_event_label) } it { is_expected.to belong_to(:end_event_label) }
it { is_expected.to belong_to(:start_event_label) } it { is_expected.to belong_to(:start_event_label) }
it { is_expected.to belong_to(:stage_event_hash) }
end end
describe 'validation' do describe 'validation' do
...@@ -138,6 +139,67 @@ RSpec.shared_examples 'value stream analytics stage' do ...@@ -138,6 +139,67 @@ RSpec.shared_examples 'value stream analytics stage' do
expect(stage_1.events_hash_code).not_to eq(stage_2.events_hash_code) expect(stage_1.events_hash_code).not_to eq(stage_2.events_hash_code)
end end
end end
# rubocop: disable Rails/SaveBang
describe '#event_hash' do
it 'associates the same stage event hash record' do
first = create(factory)
second = create(factory)
expect(first.stage_event_hash_id).to eq(second.stage_event_hash_id)
end
it 'does not introduce duplicated stage event hash records' do
expect do
create(factory)
create(factory)
end.to change { Analytics::CycleAnalytics::StageEventHash.count }.from(0).to(1)
end
it 'creates different hash record for different event configurations' do
expect do
create(factory, start_event_identifier: :issue_created, end_event_identifier: :issue_first_mentioned_in_commit)
create(factory, start_event_identifier: :merge_request_created, end_event_identifier: :merge_request_merged)
end.to change { Analytics::CycleAnalytics::StageEventHash.count }.from(0).to(2)
end
context 'when the stage event hash changes' do
let(:stage) { create(factory, start_event_identifier: :merge_request_created, end_event_identifier: :merge_request_merged) }
it 'deletes the old, unused stage event hash record' do
old_stage_event_hash = stage.stage_event_hash
stage.update!(end_event_identifier: :merge_request_first_deployed_to_production)
expect(stage.stage_event_hash_id).not_to eq(old_stage_event_hash.id)
old_stage_event_hash_from_db = Analytics::CycleAnalytics::StageEventHash.find_by_id(old_stage_event_hash.id)
expect(old_stage_event_hash_from_db).to be_nil
end
it 'does not delete used stage event hash record' do
other_stage = create(factory, start_event_identifier: :merge_request_created, end_event_identifier: :merge_request_merged)
stage.update!(end_event_identifier: :merge_request_first_deployed_to_production)
expect(stage.stage_event_hash_id).not_to eq(other_stage.stage_event_hash_id)
old_stage_event_hash_from_db = Analytics::CycleAnalytics::StageEventHash.find_by_id(other_stage.stage_event_hash_id)
expect(old_stage_event_hash_from_db).not_to be_nil
end
end
context 'when the stage events hash code does not change' do
it 'does not trigger extra query on save' do
stage = create(factory, start_event_identifier: :merge_request_created, end_event_identifier: :merge_request_merged)
expect(Analytics::CycleAnalytics::StageEventHash).not_to receive(:record_id_by_hash_sha256)
stage.update!(name: 'new title')
end
end
end
# rubocop: enable Rails/SaveBang
end end
RSpec.shared_examples 'value stream analytics label based stage' do RSpec.shared_examples 'value stream analytics label based stage' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment