Commit fd19f887 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch '50070-legacy-attachments' into 'master'

Migrate legacy uploads

Closes #57217

See merge request gitlab-org/gitlab-ce!24679
parents abb2d4c6 3335918b
---
title: Migrate legacy uploads out of deprecated paths
merge_request: 24679
author:
type: other
# frozen_string_literal: true
class MigrateLegacyAttachments < ActiveRecord::Migration[5.0]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
DOWNTIME = false
MIGRATION = 'MigrateLegacyUploads'.freeze
BATCH_SIZE = 5000
DELAY_INTERVAL = 5.minutes.to_i
class Upload < ActiveRecord::Base
self.table_name = 'uploads'
include ::EachBatch
end
def up
Upload.where(uploader: 'AttachmentUploader').each_batch(of: BATCH_SIZE) do |relation, index|
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
delay = index * DELAY_INTERVAL
BackgroundMigrationWorker.perform_in(delay, MIGRATION, [start_id, end_id])
end
end
# not needed
def down
end
end
# Migrations problems
## Legacy upload migration
> Introduced in GitLab 12.0.
The migration takes all attachments uploaded by legacy `AttachmentUploader` and
migrate them to the path that current uploaders expect.
Although it should not usually happen there could possibly be some attachments belonging to
LegacyDiffNotes. These attachments can't be seen before running the migration by users and
they should not be present in your instance.
However, if you have some of them, you will need to handle them manually.
You can find the ids of failed notes in logs as "MigrateLegacyUploads: LegacyDiffNote"
1. Run a Rails console:
```sh
sudo gitlab-rails console production
```
or for source installs:
```sh
bundle exec rails console production
```
1. Check the failed upload and find the note (you can see their ids in the logs)
```ruby
upload = Upload.find(upload_id)
note = Note.find(note_id)
```
1. Check the path - it should contain `system/note/attachment`
```ruby
upload.absolut_path
```
1. Check the path in the uploader - it should differ from the upload path and should contain `system/legacy_diff_note`
```ruby
uploader = upload.build_uploader
uploader.file
```
1. First, you need to move the file to the path that is expected from the uploader
```ruby
old_path = upload.absolute_path
new_path = upload.absolute_path.sub('-/system/note/attachment', '-/system/legacy_diff_note')
new_dir = File.dirname(new_path)
FileUtils.mkdir_p(new_dir)
FileUtils.mv(old_path, new_path)
```
1. You then need to move the file to the `FileUploader` and create a new `Upload` object
```ruby
file_uploader = UploadService.new(note.project, File.read(new_path)).execute
```
1. And update the legacy note to contain the file.
```ruby
new_text = "#{note.note} \n #{file_uploader.markdown_link}"
note.update!(
note: new_text
)
```
1. And finally, you can remove the old upload
```ruby
upload.destroy
```
If you have any problems feel free to contact [GitLab Support](https://about.gitlab.com/support/).
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This migration takes all legacy uploads (that were uploaded using AttachmentUploader)
# and migrate them to the new (FileUploader) location (=under projects).
#
# We have dependencies (uploaders) in this migration because extracting code would add a lot of complexity
# and possible errors could appear as the logic in the uploaders is not trivial.
#
# This migration will be removed in 12.4 in order to get rid of a migration that depends on
# the application code.
class MigrateLegacyUploads
include Database::MigrationHelpers
include ::Gitlab::Utils::StrongMemoize
# This class takes a legacy upload and migrates it to the correct location
class UploadMover
include Gitlab::Utils::StrongMemoize
attr_reader :upload, :project, :note
def initialize(upload)
@upload = upload
@note = Note.find_by(id: upload.model_id)
@project = note&.project
end
def execute
return unless upload
if !project
# if we don't have models associated with the upload we can not move it
say "MigrateLegacyUploads: Deleting upload due to model not found: #{upload.inspect}"
destroy_legacy_upload
elsif note.is_a?(LegacyDiffNote)
handle_legacy_note_upload
elsif !legacy_file_exists?
# if we can not find the file we just remove the upload record
say "MigrateLegacyUploads: Deleting upload due to file not found: #{upload.inspect}"
destroy_legacy_upload
else
migrate_upload
end
end
private
def migrate_upload
return unless copy_upload_to_project
add_upload_link_to_note_text
destroy_legacy_file
destroy_legacy_upload
end
# we should proceed and log whenever one upload copy fails, no matter the reasons
# rubocop: disable Lint/RescueException
def copy_upload_to_project
@uploader = FileUploader.copy_to(legacy_file_uploader, project)
say "MigrateLegacyUploads: Copied file #{legacy_file_uploader.file.path} -> #{@uploader.file.path}"
true
rescue Exception => e
say "MigrateLegacyUploads: File #{legacy_file_uploader.file.path} couldn't be copied to project uploads. Error: #{e.message}"
false
end
# rubocop: enable Lint/RescueException
def destroy_legacy_upload
note.remove_attachment = true
note.save
if upload.destroy
say "MigrateLegacyUploads: Upload #{upload.inspect} was destroyed."
else
say "MigrateLegacyUploads: Upload #{upload.inspect} destroy failed."
end
end
def destroy_legacy_file
legacy_file_uploader.file.delete
end
def add_upload_link_to_note_text
new_text = "#{note.note} \n #{@uploader.markdown_link}"
note.update!(
note: new_text
)
end
def legacy_file_uploader
strong_memoize(:legacy_file_uploader) do
uploader = upload.build_uploader
uploader.retrieve_from_store!(File.basename(upload.path))
uploader
end
end
def legacy_file_exists?
legacy_file_uploader.file.exists?
end
def handle_legacy_note_upload
note.note += "\n \n Attachment ##{upload.id} with URL \"#{note.attachment.url}\" failed to migrate \
for model class #{note.class}. See #{help_doc_link}."
note.save
say "MigrateLegacyUploads: LegacyDiffNote ##{note.id} found, can't move the file: #{upload.inspect} for upload ##{upload.id}. See #{help_doc_link}."
end
def say(message)
Rails.logger.info(message)
end
def help_doc_link
'https://docs.gitlab.com/ee/administration/troubleshooting/migrations.html#legacy-upload-migration'
end
end
def perform(start_id, end_id)
Upload.where(id: start_id..end_id, uploader: 'AttachmentUploader').find_each do |upload|
UploadMover.new(upload).execute
end
end
end
end
end
......@@ -54,10 +54,7 @@ FactoryBot.define do
end
trait :attachment_upload do
transient do
mount_point :attachment
end
mount_point :attachment
model { build(:note) }
uploader "AttachmentUploader"
end
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::MigrateLegacyUploads, :migration, schema: 20190103140724 do
let(:test_dir) { FileUploader.options['storage_path'] }
# rubocop: disable RSpec/FactoriesInMigrationSpecs
let!(:namespace) { create(:namespace) }
let!(:project) { create(:project, :legacy_storage, namespace: namespace) }
let!(:issue) { create(:issue, project: project) }
let!(:note1) { create(:note, note: 'some note text awesome', project: project, noteable: issue) }
let!(:note2) { create(:note, note: 'some note', project: project, noteable: issue) }
let!(:hashed_project) { create(:project, namespace: namespace) }
let!(:issue_hashed_project) { create(:issue, project: hashed_project) }
let!(:note_hashed_project) { create(:note, note: 'some note', project: hashed_project, attachment: 'text.pdf', noteable: issue_hashed_project) }
let!(:standard_upload) do
create(:upload,
path: "secretabcde/image.png",
model_id: create(:project).id, model_type: 'Project', uploader: 'FileUploader')
end
def create_remote_upload(model, filename)
create(:upload, :attachment_upload,
path: "note/attachment/#{model.id}/#{filename}", secret: nil,
store: ObjectStorage::Store::REMOTE, model: model)
end
def create_upload(model, filename, with_file = true)
params = {
path: "uploads/-/system/note/attachment/#{model.id}/#{filename}",
model: model,
store: ObjectStorage::Store::LOCAL
}
upload = if with_file
create(:upload, :with_file, :attachment_upload, params)
else
create(:upload, :attachment_upload, params)
end
model.update(attachment: upload.build_uploader)
model.attachment.upload
end
let(:start_id) { 1 }
let(:end_id) { 10000 }
def new_upload_legacy
Upload.find_by(model_id: project.id, model_type: 'Project')
end
def new_upload_hashed
Upload.find_by(model_id: hashed_project.id, model_type: 'Project')
end
shared_examples 'migrates files correctly' do
before do
described_class.new.perform(start_id, end_id)
end
it 'removes all the legacy upload records' do
expect(Upload.where(uploader: 'AttachmentUploader')).to be_empty
expect(standard_upload.reload).to eq(standard_upload)
end
it 'creates new upload records correctly' do
expect(new_upload_legacy.secret).not_to be_nil
expect(new_upload_legacy.path).to end_with("#{new_upload_legacy.secret}/image.png")
expect(new_upload_legacy.model_id).to eq(project.id)
expect(new_upload_legacy.model_type).to eq('Project')
expect(new_upload_legacy.uploader).to eq('FileUploader')
expect(new_upload_hashed.secret).not_to be_nil
expect(new_upload_hashed.path).to end_with("#{new_upload_hashed.secret}/text.pdf")
expect(new_upload_hashed.model_id).to eq(hashed_project.id)
expect(new_upload_hashed.model_type).to eq('Project')
expect(new_upload_hashed.uploader).to eq('FileUploader')
end
it 'updates the legacy upload notes so that they include the file references in the markdown' do
expected_path = File.join('/uploads', new_upload_legacy.secret, 'image.png')
expected_markdown = "some note text awesome \n ![image](#{expected_path})"
expect(note1.reload.note).to eq(expected_markdown)
expected_path = File.join('/uploads', new_upload_hashed.secret, 'text.pdf')
expected_markdown = "some note \n [text.pdf](#{expected_path})"
expect(note_hashed_project.reload.note).to eq(expected_markdown)
end
it 'removed the attachments from the note model' do
expect(note1.reload.attachment.file).to be_nil
expect(note2.reload.attachment.file).to be_nil
expect(note_hashed_project.reload.attachment.file).to be_nil
end
end
context 'when legacy uploads are stored in local storage' do
let!(:legacy_upload1) { create_upload(note1, 'image.png') }
let!(:legacy_upload_not_found) { create_upload(note2, 'image.png', false) }
let!(:legacy_upload_hashed) { create_upload(note_hashed_project, 'text.pdf', with_file: true) }
shared_examples 'removes legacy local files' do
it 'removes all the legacy upload records' do
expect(File.exist?(legacy_upload1.absolute_path)).to be_truthy
expect(File.exist?(legacy_upload_hashed.absolute_path)).to be_truthy
described_class.new.perform(start_id, end_id)
expect(File.exist?(legacy_upload1.absolute_path)).to be_falsey
expect(File.exist?(legacy_upload_hashed.absolute_path)).to be_falsey
end
end
context 'when object storage is disabled for FileUploader' do
it_behaves_like 'migrates files correctly'
it_behaves_like 'removes legacy local files'
it 'moves legacy uploads to the correct location' do
described_class.new.perform(start_id, end_id)
expected_path1 = File.join(test_dir, 'uploads', namespace.path, project.path, new_upload_legacy.secret, 'image.png')
expected_path2 = File.join(test_dir, 'uploads', hashed_project.disk_path, new_upload_hashed.secret, 'text.pdf')
expect(File.exist?(expected_path1)).to be_truthy
expect(File.exist?(expected_path2)).to be_truthy
end
context 'when the upload move fails' do
it 'does not remove old uploads' do
expect(FileUploader).to receive(:copy_to).twice.and_raise('failed')
described_class.new.perform(start_id, end_id)
expect(legacy_upload1.reload).to eq(legacy_upload1)
expect(legacy_upload_hashed.reload).to eq(legacy_upload_hashed)
expect(standard_upload.reload).to eq(standard_upload)
end
end
end
context 'when object storage is enabled for FileUploader' do
before do
stub_uploads_object_storage(FileUploader)
end
it_behaves_like 'migrates files correctly'
it_behaves_like 'removes legacy local files'
# The process of migrating to object storage is a manual one,
# so it would go against expectations to automatically migrate these files
# to object storage during this migration.
# After this migration, these files should be able to successfully migrate to object storage.
it 'stores files locally' do
described_class.new.perform(start_id, end_id)
expected_path1 = File.join(test_dir, 'uploads', namespace.path, project.path, new_upload_legacy.secret, 'image.png')
expected_path2 = File.join(test_dir, 'uploads', hashed_project.disk_path, new_upload_hashed.secret, 'text.pdf')
expect(File.exist?(expected_path1)).to be_truthy
expect(File.exist?(expected_path2)).to be_truthy
end
end
context 'with legacy_diff_note upload' do
let!(:merge_request) { create(:merge_request, source_project: project) }
let!(:legacy_diff_note) { create(:legacy_diff_note_on_merge_request, note: 'some note', project: project, noteable: merge_request) }
let!(:legacy_upload_diff_note) do
create(:upload, :with_file, :attachment_upload,
path: "uploads/-/system/note/attachment/#{legacy_diff_note.id}/some_legacy.pdf", model: legacy_diff_note)
end
before do
described_class.new.perform(start_id, end_id)
end
it 'does not remove legacy diff note file' do
expect(File.exist?(legacy_upload_diff_note.absolute_path)).to be_truthy
end
it 'removes all the legacy upload records except for the one with legacy_diff_note' do
expect(Upload.where(uploader: 'AttachmentUploader')).to eq([legacy_upload_diff_note])
end
it 'adds link to the troubleshooting documentation to the note' do
help_doc_link = 'https://docs.gitlab.com/ee/administration/troubleshooting/migrations.html#legacy-upload-migration'
expect(legacy_diff_note.reload.note).to include(help_doc_link)
end
end
end
context 'when legacy uploads are stored in object storage' do
let!(:legacy_upload1) { create_remote_upload(note1, 'image.png') }
let!(:legacy_upload_not_found) { create_remote_upload(note2, 'non-existing.pdf') }
let!(:legacy_upload_hashed) { create_remote_upload(note_hashed_project, 'text.pdf') }
let(:remote_files) do
[
{ key: "#{legacy_upload1.path}" },
{ key: "#{legacy_upload_hashed.path}" }
]
end
let(:connection) { ::Fog::Storage.new(FileUploader.object_store_credentials) }
let(:bucket) { connection.directories.create(key: 'uploads') }
def create_remote_files
remote_files.each { |file| bucket.files.create(file) }
end
before do
stub_uploads_object_storage(FileUploader)
create_remote_files
end
it_behaves_like 'migrates files correctly'
it 'moves legacy uploads to the correct remote location' do
described_class.new.perform(start_id, end_id)
connection = ::Fog::Storage.new(FileUploader.object_store_credentials)
expect(connection.get_object('uploads', new_upload_legacy.path)[:status]).to eq(200)
expect(connection.get_object('uploads', new_upload_hashed.path)[:status]).to eq(200)
end
it 'removes all the legacy upload records' do
described_class.new.perform(start_id, end_id)
remote_files.each do |remote_file|
expect(bucket.files.get(remote_file[:key])).to be_nil
end
end
end
# rubocop: enable RSpec/FactoriesInMigrationSpecs
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment