Commit 5724eaff authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'add-rake-command-to-migrate-locally-persisted-archived-traces' into 'master'

Add rake command to migrate archived traces from local storage to object storage

Closes #50232

See merge request gitlab-org/gitlab-ce!21193
parents e6da699e 63091cfe
......@@ -67,6 +67,10 @@ module Ci
'', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').archive)
end
scope :with_archived_trace, ->() do
where('EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace)
end
scope :without_archived_trace, ->() do
where('NOT EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace)
end
......@@ -77,6 +81,7 @@ module Ci
end
scope :with_artifacts_stored_locally, -> { with_artifacts_archive.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
scope :with_archived_trace_stored_locally, -> { with_archived_trace.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
scope :with_artifacts_not_expired, ->() { with_artifacts_archive.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
scope :with_expired_artifacts, ->() { with_artifacts_archive.where('artifacts_expire_at < ?', Time.now) }
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
......
---
title: Add rake command to migrate archived traces from local storage to object storage
merge_request: 21193
author:
type: added
......@@ -3,10 +3,6 @@
Job traces are sent by GitLab Runner while it's processing a job. You can see
traces in job pages, pipelines, email notifications, etc.
There isn't a way to automatically expire old job logs, but it's safe to remove
them if they're taking up too much space. If you remove the logs manually, the
job output in the UI will be empty.
## Data flow
In general, there are two states in job traces: "live trace" and "archived trace".
......@@ -57,11 +53,55 @@ To change the location where the job logs will be stored, follow the steps below
## Uploading traces to object storage
An archived trace is considered as a [job artifact](job_artifacts.md).
Therefore, when you [set up an object storage](job_artifacts.md#object-storage-settings),
Archived traces are considered as [job artifacts](job_artifacts.md).
Therefore, when you [set up the object storage integration](job_artifacts.md#object-storage-settings),
job traces are automatically migrated to it along with the other job artifacts.
See [Data flow](#data-flow) to learn about the process.
See "Phase 4: uploading" in [Data flow](#data-flow) to learn about the process.
## How to archive legacy job trace files
Legacy job traces, which were created before GitLab 10.5, were not archived regularly.
It's the same state with the "2: overwriting" in the above [Data flow](#data-flow).
To archive those legacy job traces, please follow the instruction below.
1. Execute the following command
```bash
gitlab-rake gitlab:traces:archive
```
After you executed this task, GitLab instance queues up Sidekiq jobs (asynchronous processes)
for migrating job trace files from local storage to object storage.
It could take time to complete the all migration jobs. You can check the progress by the following command
```bash
sudo gitlab-rails console
```
```bash
[1] pry(main)> Sidekiq::Stats.new.queues['pipeline_background:archive_trace']
=> 100
```
If the count becomes zero, the archiving processes are done
## How to migrate archived job traces to object storage
If job traces have already been archived into local storage, and you want to migrate those traces to object storage, please follow the instruction below.
1. Ensure [Object storage integration for Job Artifacts](job_artifacts.md#object-storage-settings) is enabled
1. Execute the following command
```bash
gitlab-rake gitlab:traces:migrate
```
## How to remove job traces
There isn't a way to automatically expire old job logs, but it's safe to remove
them if they're taking up too much space. If you remove the logs manually, the
job output in the UI will be empty.
## New live trace architecture
......
......@@ -18,5 +18,22 @@ namespace :gitlab do
logger.info("Scheduled #{job_ids.count} jobs. From #{job_ids.min} to #{job_ids.max}")
end
end
task migrate: :environment do
logger = Logger.new(STDOUT)
logger.info('Starting transfer of job traces')
Ci::Build.joins(:project)
.with_archived_trace_stored_locally
.find_each(batch_size: 10) do |build|
begin
build.job_artifacts_trace.file.migrate!(ObjectStorage::Store::REMOTE)
logger.info("Transferred job trace of #{build.id} to object storage")
rescue => e
logger.error("Failed to transfer artifacts of #{build.id} with error: #{e.message}")
end
end
end
end
end
......@@ -5,6 +5,7 @@ describe 'gitlab:traces rake tasks' do
Rake.application.rake_require 'tasks/gitlab/traces'
end
describe 'gitlab:traces:archive' do
shared_examples 'passes the job id to worker' do
it do
expect(ArchiveTraceWorker).to receive(:bulk_perform_async).with([[job.id]])
......@@ -52,4 +53,61 @@ describe 'gitlab:traces rake tasks' do
it_behaves_like 'does not pass the job id to worker'
end
end
describe 'gitlab:traces:migrate' do
let(:object_storage_enabled) { false }
before do
stub_artifacts_object_storage(enabled: object_storage_enabled)
end
subject { run_rake_task('gitlab:traces:migrate') }
let!(:job_trace) { create(:ci_job_artifact, :trace, file_store: store) }
context 'when local storage is used' do
let(:store) { ObjectStorage::Store::LOCAL }
context 'and job does not have file store defined' do
let(:object_storage_enabled) { true }
let(:store) { nil }
it "migrates file to remote storage" do
subject
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
end
end
context 'and remote storage is defined' do
let(:object_storage_enabled) { true }
it "migrates file to remote storage" do
subject
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
end
end
context 'and remote storage is not defined' do
it "fails to migrate to remote storage" do
subject
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::LOCAL)
end
end
end
context 'when remote storage is used' do
let(:object_storage_enabled) { true }
let(:store) { ObjectStorage::Store::REMOTE }
it "file stays on remote storage" do
subject
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment