Commit 96747556 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'live-trace-v2-persist-data' into 'master'

Live trace: Rescue stale live trace

See merge request gitlab-org/gitlab-ce!18680
parents 75ed8a09 dfb0d45d
......@@ -66,6 +66,7 @@ module Ci
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
scope :manual_actions, ->() { where(when: :manual, status: COMPLETED_STATUSES + [:manual]) }
scope :ref_protected, -> { where(protected: true) }
scope :with_live_trace, -> { where('EXISTS (?)', Ci::BuildTraceChunk.where('ci_builds.id = ci_build_trace_chunks.build_id').select(1)) }
scope :matches_tag_ids, -> (tag_ids) do
matcher = ::ActsAsTaggableOn::Tagging
......
......@@ -47,6 +47,6 @@ module ExclusiveLeaseGuard
end
def log_error(message, extra_args = {})
logger.error(message)
Rails.logger.error(message)
end
end
......@@ -17,6 +17,7 @@
- cronjob:stuck_ci_jobs
- cronjob:stuck_import_jobs
- cronjob:stuck_merge_jobs
- cronjob:ci_archive_traces_cron
- cronjob:trending_projects
- cronjob:issue_due_scheduler
......
module Ci
class ArchiveTracesCronWorker
include ApplicationWorker
include CronjobQueue
def perform
# Archive stale live traces which still resides in redis or database
# This could happen when ArchiveTraceWorker sidekiq jobs were lost by receiving SIGKILL
# More details in https://gitlab.com/gitlab-org/gitlab-ce/issues/36791
Ci::Build.finished.with_live_trace.find_each(batch_size: 100) do |build|
begin
build.trace.archive!
rescue => e
failed_archive_counter.increment
Rails.logger.error "Failed to archive stale live trace. id: #{build.id} message: #{e.message}"
end
end
end
private
def failed_archive_counter
@failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of traces archiving")
end
end
end
---
title: Add a cronworker to rescue stale live traces
merge_request: 18680
author:
type: performance
......@@ -289,6 +289,9 @@ Settings.cron_jobs['repository_archive_cache_worker']['job_class'] = 'Repository
Settings.cron_jobs['import_export_project_cleanup_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['import_export_project_cleanup_worker']['cron'] ||= '0 * * * *'
Settings.cron_jobs['import_export_project_cleanup_worker']['job_class'] = 'ImportExportProjectCleanupWorker'
Settings.cron_jobs['ci_archive_traces_cron_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['ci_archive_traces_cron_worker']['cron'] ||= '17 * * * *'
Settings.cron_jobs['ci_archive_traces_cron_worker']['job_class'] = 'Ci::ArchiveTracesCronWorker'
Settings.cron_jobs['requests_profiles_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['requests_profiles_worker']['cron'] ||= '0 0 * * *'
Settings.cron_jobs['requests_profiles_worker']['job_class'] = 'RequestsProfilesWorker'
......
module Gitlab
module Ci
class Trace
include ExclusiveLeaseGuard
LEASE_TIMEOUT = 1.hour
ArchiveError = Class.new(StandardError)
attr_reader :job
......@@ -105,6 +109,14 @@ module Gitlab
end
def archive!
try_obtain_lease do
unsafe_archive!
end
end
private
def unsafe_archive!
raise ArchiveError, 'Already archived' if trace_artifact
raise ArchiveError, 'Job is not finished yet' unless job.complete?
......@@ -126,8 +138,6 @@ module Gitlab
end
end
private
def archive_stream!(stream)
clone_file!(stream, JobArtifactUploader.workhorse_upload_path) do |clone_path|
create_build_trace!(job, clone_path)
......@@ -206,6 +216,16 @@ module Gitlab
def trace_artifact
job.job_artifacts_trace
end
# For ExclusiveLeaseGuard concern
def lease_key
@lease_key ||= "trace:archive:#{job.id}"
end
# For ExclusiveLeaseGuard concern
def lease_timeout
LEASE_TIMEOUT
end
end
end
end
require 'spec_helper'
describe Gitlab::Ci::Trace, :clean_gitlab_redis_cache do
describe Gitlab::Ci::Trace, :clean_gitlab_redis_shared_state do
let(:build) { create(:ci_build) }
let(:trace) { described_class.new(build) }
......
......@@ -116,6 +116,26 @@ describe Ci::Build do
end
end
describe '.with_live_trace' do
subject { described_class.with_live_trace }
context 'when build has live trace' do
let!(:build) { create(:ci_build, :success, :trace_live) }
it 'selects the build' do
is_expected.to eq([build])
end
end
context 'when build does not have live trace' do
let!(:build) { create(:ci_build, :success, :trace_artifact) }
it 'does not select the build' do
is_expected.to be_empty
end
end
end
describe '#actionize' do
context 'when build is a created' do
before do
......
......@@ -227,6 +227,42 @@ shared_examples_for 'common trace features' do
end
end
end
describe '#archive!' do
subject { trace.archive! }
context 'when build status is success' do
let!(:build) { create(:ci_build, :success, :trace_live) }
it 'does not have an archived trace yet' do
expect(build.job_artifacts_trace).to be_nil
end
context 'when archives' do
it 'has an archived trace' do
subject
build.reload
expect(build.job_artifacts_trace).to be_exist
end
context 'when another process has already been archiving', :clean_gitlab_redis_shared_state do
before do
Gitlab::ExclusiveLease.new("trace:archive:#{trace.job.id}", timeout: 1.hour).try_obtain
end
it 'blocks concurrent archiving' do
expect(Rails.logger).to receive(:error).with('Cannot obtain an exclusive lease. There must be another instance already in execution.')
subject
build.reload
expect(build.job_artifacts_trace).to be_nil
end
end
end
end
end
end
shared_examples_for 'trace with disabled live trace feature' do
......
require 'spec_helper'
describe Ci::ArchiveTracesCronWorker do
subject { described_class.new.perform }
before do
stub_feature_flags(ci_enable_live_trace: true)
end
shared_examples_for 'archives trace' do
it do
subject
build.reload
expect(build.job_artifacts_trace).to be_exist
end
end
shared_examples_for 'does not archive trace' do
it do
subject
build.reload
expect(build.job_artifacts_trace).to be_nil
end
end
context 'when a job was succeeded' do
let!(:build) { create(:ci_build, :success, :trace_live) }
it_behaves_like 'archives trace'
context 'when archive raised an exception' do
let!(:build) { create(:ci_build, :success, :trace_artifact, :trace_live) }
let!(:build2) { create(:ci_build, :success, :trace_live) }
it 'archives valid targets' do
expect(Rails.logger).to receive(:error).with("Failed to archive stale live trace. id: #{build.id} message: Already archived")
subject
build2.reload
expect(build2.job_artifacts_trace).to be_exist
end
end
end
context 'when a job was cancelled' do
let!(:build) { create(:ci_build, :canceled, :trace_live) }
it_behaves_like 'archives trace'
end
context 'when a job is running' do
let!(:build) { create(:ci_build, :running, :trace_live) }
it_behaves_like 'does not archive trace'
end
end
......@@ -132,8 +132,10 @@ describe StuckCiJobsWorker do
end
it 'cancels exclusive lease after worker perform' do
expect(Gitlab::ExclusiveLease).to receive(:cancel).with(described_class::EXCLUSIVE_LEASE_KEY, exclusive_lease_uuid)
worker.perform
expect(Gitlab::ExclusiveLease.new(described_class::EXCLUSIVE_LEASE_KEY, timeout: 1.hour))
.not_to be_exists
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment