Commit 271e7a32 authored by Greg Stark's avatar Greg Stark

Add indexes and change SQL for expired artifacts to deal with artifacts migration efficiently

Artifacts are in the middle of being migrated from ci_builds to
ci_job_artifacts. The expiration date is currently visible in both of
these tables and the test for whether an expired artifact is present
for a job is complex as it requires checking both the of the tables.

Add two new indexes, one on ci_builds.artifacts_expire_at and one on
ci_job_artifacts.expire_at to enable finding expired artifacts
efficiently.

And until the migration is finished, replace the SQL for finding
expired and non-expired artifacts with a hand-crafted UNION ALL based
query instead of using OR. This overcomes a database optimizer
limitation that prevents it from using these indexes.

When the migration is finished the next version should remove this
query and replace it with a much simpler query on just
ci_job_artifacts. See
https://gitlab.com/gitlab-org/gitlab-ce/issues/42561 for followup.
parent dbb934c8
...@@ -41,12 +41,44 @@ module Ci ...@@ -41,12 +41,44 @@ module Ci
scope :unstarted, ->() { where(runner_id: nil) } scope :unstarted, ->() { where(runner_id: nil) }
scope :ignore_failures, ->() { where(allow_failure: false) } scope :ignore_failures, ->() { where(allow_failure: false) }
# This convoluted mess is because we need to handle two cases of
# artifact files during the migration. And a simple OR clause
# makes it impossible to optimize.
# Instead we want to use UNION ALL and do two carefully
# constructed disjoint queries. But Rails cannot handle UNION or
# UNION ALL queries so we do the query in a subquery and wrap it
# in an otherwise redundant WHERE IN query (IN is fine for
# non-null columns).
# This should all be ripped out when the migration is finished and
# replaced with just the new storage to avoid the extra work.
scope :with_artifacts, ->() do scope :with_artifacts, ->() do
where('(artifacts_file IS NOT NULL AND artifacts_file <> ?) OR EXISTS (?)', old = where(%q[artifacts_file <> ''])
'', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id')) new = where(%q[(artifacts_file IS NULL OR artifacts_file = '') AND EXISTS (?)],
Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id'))
union = Gitlab::SQL::Union.new([old, new], remove_duplicates: false)
Ci::Build.from("(#{union.to_sql}) #{Ci::Build.table_name}")
end end
scope :with_artifacts_not_expired, ->() { with_artifacts.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
scope :with_expired_artifacts, ->() { with_artifacts.where('artifacts_expire_at < ?', Time.now) } scope :with_artifacts_not_expired, ->() do
old = where(%q[artifacts_file <> '' AND (artifacts_expire_at IS NULL OR artifacts_expire_at > ?)], Time.now)
new = where(%q[(artifacts_file IS NULL OR artifacts_file = '') AND EXISTS (?)],
Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id AND (expire_at IS NULL OR expire_at > ?)', Time.now))
union = Gitlab::SQL::Union.new([old, new], remove_duplicates: false)
Ci::Build.from("(#{union.to_sql}) #{Ci::Build.table_name}")
end
scope :with_expired_artifacts, ->() do
old = where(%q[artifacts_file <> '' AND artifacts_expire_at < ?], Time.now)
new = where(%q[(artifacts_file IS NULL OR artifacts_file = '') AND EXISTS (?)],
Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id AND expire_at < ?', Time.now))
union = Gitlab::SQL::Union.new([old, new], remove_duplicates: false)
Ci::Build.from("(#{union.to_sql}) #{Ci::Build.table_name}")
end
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) } scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
scope :manual_actions, ->() { where(when: :manual, status: COMPLETED_STATUSES + [:manual]) } scope :manual_actions, ->() { where(when: :manual, status: COMPLETED_STATUSES + [:manual]) }
scope :ref_protected, -> { where(protected: true) } scope :ref_protected, -> { where(protected: true) }
......
---
title: Change SQL for expired artifacts to use new ci_job_artifacts.expire_at
merge_request: 16578
author:
type: performance
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class OptimizeCiJobArtifacts < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
disable_ddl_transaction!
def up
# job_id is just here to be a covering index for index only scans
# since we'll almost always be joining against ci_builds on job_id
add_concurrent_index(:ci_job_artifacts, [:expire_at, :job_id])
add_concurrent_index(:ci_builds, [:artifacts_expire_at], where: "artifacts_file <> ''")
end
def down
remove_concurrent_index(:ci_job_artifacts, [:expire_at, :job_id])
remove_concurrent_index(:ci_builds, [:artifacts_expire_at], where: "artifacts_file <> ''")
end
end
...@@ -293,6 +293,7 @@ ActiveRecord::Schema.define(version: 20180206200543) do ...@@ -293,6 +293,7 @@ ActiveRecord::Schema.define(version: 20180206200543) do
t.integer "failure_reason" t.integer "failure_reason"
end end
add_index "ci_builds", ["artifacts_expire_at"], name: "index_ci_builds_on_artifacts_expire_at", where: "(artifacts_file <> ''::text)", using: :btree
add_index "ci_builds", ["auto_canceled_by_id"], name: "index_ci_builds_on_auto_canceled_by_id", using: :btree add_index "ci_builds", ["auto_canceled_by_id"], name: "index_ci_builds_on_auto_canceled_by_id", using: :btree
add_index "ci_builds", ["commit_id", "stage_idx", "created_at"], name: "index_ci_builds_on_commit_id_and_stage_idx_and_created_at", using: :btree add_index "ci_builds", ["commit_id", "stage_idx", "created_at"], name: "index_ci_builds_on_commit_id_and_stage_idx_and_created_at", using: :btree
add_index "ci_builds", ["commit_id", "status", "type"], name: "index_ci_builds_on_commit_id_and_status_and_type", using: :btree add_index "ci_builds", ["commit_id", "status", "type"], name: "index_ci_builds_on_commit_id_and_status_and_type", using: :btree
...@@ -333,6 +334,7 @@ ActiveRecord::Schema.define(version: 20180206200543) do ...@@ -333,6 +334,7 @@ ActiveRecord::Schema.define(version: 20180206200543) do
t.string "file" t.string "file"
end end
add_index "ci_job_artifacts", ["expire_at", "job_id"], name: "index_ci_job_artifacts_on_expire_at_and_job_id", using: :btree
add_index "ci_job_artifacts", ["job_id", "file_type"], name: "index_ci_job_artifacts_on_job_id_and_file_type", unique: true, using: :btree add_index "ci_job_artifacts", ["job_id", "file_type"], name: "index_ci_job_artifacts_on_job_id_and_file_type", unique: true, using: :btree
add_index "ci_job_artifacts", ["project_id"], name: "index_ci_job_artifacts_on_project_id", using: :btree add_index "ci_job_artifacts", ["project_id"], name: "index_ci_job_artifacts_on_project_id", using: :btree
......
...@@ -180,8 +180,8 @@ FactoryBot.define do ...@@ -180,8 +180,8 @@ FactoryBot.define do
trait :artifacts do trait :artifacts do
after(:create) do |build| after(:create) do |build|
create(:ci_job_artifact, :archive, job: build) create(:ci_job_artifact, :archive, job: build, expire_at: build.artifacts_expire_at)
create(:ci_job_artifact, :metadata, job: build) create(:ci_job_artifact, :metadata, job: build, expire_at: build.artifacts_expire_at)
build.reload build.reload
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment