Commit b6ea41d1 authored by Michael Kozono's avatar Michael Kozono

Find and store unhashed upload file paths

parent ab814e4d
module Gitlab module Gitlab
module BackgroundMigration module BackgroundMigration
class PrepareUnhashedUploads class PrepareUnhashedUploads
FILE_PATH_BATCH_SIZE = 500
UPLOAD_DIR = "#{CarrierWave.root}/uploads"
class UnhashedUploadFile < ActiveRecord::Base class UnhashedUploadFile < ActiveRecord::Base
self.table_name = 'unhashed_upload_files' self.table_name = 'unhashed_upload_files'
end end
...@@ -8,8 +11,8 @@ module Gitlab ...@@ -8,8 +11,8 @@ module Gitlab
def perform def perform
return unless migrate? return unless migrate?
clear_unhashed_upload_files clear_unhashed_upload_file_paths
store_unhashed_upload_files store_unhashed_upload_file_paths
schedule_populate_untracked_uploads_jobs schedule_populate_untracked_uploads_jobs
end end
...@@ -19,12 +22,55 @@ module Gitlab ...@@ -19,12 +22,55 @@ module Gitlab
UnhashedUploadFile.table_exists? UnhashedUploadFile.table_exists?
end end
def clear_unhashed_upload_files def clear_unhashed_upload_file_paths
# TODO UnhashedUploadFile.delete_all
end end
def store_unhashed_upload_files def store_unhashed_upload_file_paths
# TODO return unless Dir.exists?(UPLOAD_DIR)
file_paths = []
each_file_path(UPLOAD_DIR) do |file_path|
file_paths << file_path
if file_paths.size >= FILE_PATH_BATCH_SIZE
insert_file_paths(file_paths)
file_paths = []
end
end
insert_file_paths(file_paths) if file_paths.any?
end
def each_file_path(search_dir, &block)
cmd = build_find_command(search_dir)
Open3.popen2(*cmd) do |stdin, stdout, status_thread|
stdout.each_line("\0") do |line|
yield(line.chomp("\0"))
end
raise "Find command failed" unless status_thread.value.success?
end
end
def build_find_command(search_dir)
cmd = ['find', search_dir, '-type', 'f', '!', '-path', "#{UPLOAD_DIR}/@hashed/*", '!', '-path', "#{UPLOAD_DIR}/tmp/*", '-print0']
['ionice', '-c', 'Idle'] + cmd if ionice_is_available?
cmd
end
def ionice_is_available?
Gitlab::Utils.which('ionice')
rescue StandardError
# In this case, returning false is relatively safe, even though it isn't very nice
false
end
def insert_file_paths(file_paths)
file_paths.each do |file_path|
UnhashedUploadFile.create!(path: file_path)
end
end end
def schedule_populate_untracked_uploads_jobs def schedule_populate_untracked_uploads_jobs
......
require 'spec_helper'
describe Gitlab::BackgroundMigration::PrepareUnhashedUploads, :migration, schema: 20171103140253 do
let!(:unhashed_upload_files) { table(:unhashed_upload_files) }
let(:user1) { create(:user) }
let(:user2) { create(:user) }
let(:project1) { create(:project) }
let(:project2) { create(:project) }
let(:appearance) { create(:appearance) }
context 'when files were uploaded before and after hashed storage was enabled' do
before do
fixture = Rails.root.join('spec', 'fixtures', 'rails_sample.jpg')
uploaded_file = fixture_file_upload(fixture)
user1.update(avatar: uploaded_file)
project1.update(avatar: uploaded_file)
appearance.update(logo: uploaded_file, header_logo: uploaded_file)
uploaded_file = fixture_file_upload(fixture)
UploadService.new(project1, uploaded_file, FileUploader).execute # Markdown upload
stub_application_setting(hashed_storage_enabled: true)
# Hashed files
uploaded_file = fixture_file_upload(fixture)
UploadService.new(project2, uploaded_file, FileUploader).execute
end
it 'adds unhashed files to the unhashed_upload_files table' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(0).to(5)
end
it 'does not add hashed files to the unhashed_upload_files table' do
described_class.new.perform
hashed_file_path = project2.uploads.where(uploader: 'FileUploader').first.path
expect(unhashed_upload_files.where("path like '%#{hashed_file_path}%'").exists?).to be_falsey
end
# E.g. from a previous failed run of this background migration
context 'when there is existing data in unhashed_upload_files' do
before do
unhashed_upload_files.create(path: '/foo/bar.jpg')
end
it 'clears existing data before adding new data' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(1).to(5)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context 'when there are files in /uploads/tmp' do
before do
FileUtils.touch(Rails.root.join(described_class::UPLOAD_DIR, 'tmp', 'some_file.jpg'))
end
it 'does not add files from /uploads/tmp' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(0).to(5)
end
end
end
# Very new or lightly-used installations that are running this migration
# may not have an upload directory because they have no uploads.
context 'when no files were ever uploaded' do
it 'does not add to the unhashed_upload_files table (and does not raise error)' do
expect do
described_class.new.perform
end.not_to change { unhashed_upload_files.count }.from(0)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment