Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
978dbbd0
Commit
978dbbd0
authored
Dec 04, 2017
by
Michael Kozono
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Port of mk-add-old-attachments-to-uploads-table to EE
parent
0fb58cf6
Changes
14
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1305 additions
and
14 deletions
+1305
-14
changelogs/unreleased/mk-add-old-attachments-to-uploads-table.yml
...gs/unreleased/mk-add-old-attachments-to-uploads-table.yml
+5
-0
db/migrate/20171103000000_set_uploads_path_size_for_mysql.rb
db/migrate/20171103000000_set_uploads_path_size_for_mysql.rb
+25
-0
db/post_migrate/20171103140253_track_untracked_uploads.rb
db/post_migrate/20171103140253_track_untracked_uploads.rb
+21
-0
db/schema.rb
db/schema.rb
+1
-1
lib/gitlab/background_migration/populate_untracked_uploads.rb
...gitlab/background_migration/populate_untracked_uploads.rb
+259
-0
lib/gitlab/background_migration/prepare_untracked_uploads.rb
lib/gitlab/background_migration/prepare_untracked_uploads.rb
+163
-0
lib/gitlab/database.rb
lib/gitlab/database.rb
+8
-2
lib/gitlab/utils.rb
lib/gitlab/utils.rb
+7
-7
spec/lib/gitlab/background_migration/populate_untracked_uploads_spec.rb
...b/background_migration/populate_untracked_uploads_spec.rb
+510
-0
spec/lib/gitlab/background_migration/prepare_untracked_uploads_spec.rb
...ab/background_migration/prepare_untracked_uploads_spec.rb
+242
-0
spec/lib/gitlab/database_spec.rb
spec/lib/gitlab/database_spec.rb
+16
-0
spec/lib/gitlab/utils_spec.rb
spec/lib/gitlab/utils_spec.rb
+1
-4
spec/migrations/track_untracked_uploads_spec.rb
spec/migrations/track_untracked_uploads_spec.rb
+27
-0
spec/support/track_untracked_uploads_helpers.rb
spec/support/track_untracked_uploads_helpers.rb
+20
-0
No files found.
changelogs/unreleased/mk-add-old-attachments-to-uploads-table.yml
0 → 100644
View file @
978dbbd0
---
title
:
Add untracked files to uploads table
merge_request
:
15270
author
:
type
:
other
db/migrate/20171103000000_set_uploads_path_size_for_mysql.rb
0 → 100644
View file @
978dbbd0
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class
SetUploadsPathSizeForMysql
<
ActiveRecord
::
Migration
include
Gitlab
::
Database
::
MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME
=
false
def
up
# We need at least 297 at the moment. For more detail on that number, see:
# https://gitlab.com/gitlab-org/gitlab-ce/issues/40168#what-is-the-expected-correct-behavior
#
# Rails + PostgreSQL `string` is equivalent to a `text` field, but
# Rails + MySQL `string` is `varchar(255)` by default. Also, note that we
# have an upper limit because with a unique index, MySQL has a max key
# length of 3072 bytes which seems to correspond to `varchar(1024)`.
change_column
:uploads
,
:path
,
:string
,
limit:
511
end
def
down
# It was unspecified, which is varchar(255) by default in Rails for MySQL.
change_column
:uploads
,
:path
,
:string
end
end
db/post_migrate/20171103140253_track_untracked_uploads.rb
0 → 100644
View file @
978dbbd0
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class
TrackUntrackedUploads
<
ActiveRecord
::
Migration
include
Gitlab
::
Database
::
MigrationHelpers
disable_ddl_transaction!
DOWNTIME
=
false
MIGRATION
=
'PrepareUntrackedUploads'
def
up
BackgroundMigrationWorker
.
perform_async
(
MIGRATION
)
end
def
down
if
table_exists?
(
:untracked_files_for_uploads
)
drop_table
:untracked_files_for_uploads
end
end
end
db/schema.rb
View file @
978dbbd0
...
@@ -2216,7 +2216,7 @@ ActiveRecord::Schema.define(version: 20171124165823) do
...
@@ -2216,7 +2216,7 @@ ActiveRecord::Schema.define(version: 20171124165823) do
create_table
"uploads"
,
force: :cascade
do
|
t
|
create_table
"uploads"
,
force: :cascade
do
|
t
|
t
.
integer
"size"
,
limit:
8
,
null:
false
t
.
integer
"size"
,
limit:
8
,
null:
false
t
.
string
"path"
,
null:
false
t
.
string
"path"
,
limit:
511
,
null:
false
t
.
string
"checksum"
,
limit:
64
t
.
string
"checksum"
,
limit:
64
t
.
integer
"model_id"
t
.
integer
"model_id"
t
.
string
"model_type"
t
.
string
"model_type"
...
...
lib/gitlab/background_migration/populate_untracked_uploads.rb
0 → 100644
View file @
978dbbd0
# frozen_string_literal: true
module
Gitlab
module
BackgroundMigration
# This class processes a batch of rows in `untracked_files_for_uploads` by
# adding each file to the `uploads` table if it does not exist.
class
PopulateUntrackedUploads
# rubocop:disable Metrics/ClassLength
# This class is responsible for producing the attributes necessary to
# track an uploaded file in the `uploads` table.
class
UntrackedFile
<
ActiveRecord
::
Base
# rubocop:disable Metrics/ClassLength, Metrics/LineLength
self
.
table_name
=
'untracked_files_for_uploads'
# Ends with /:random_hex/:filename
FILE_UPLOADER_PATH
=
%r{/
\h
+/[^/]+
\z
}
FULL_PATH_CAPTURE
=
%r{
\A
(.+)
#{
FILE_UPLOADER_PATH
}
}
# These regex patterns are tested against a relative path, relative to
# the upload directory.
# For convenience, if there exists a capture group in the pattern, then
# it indicates the model_id.
PATH_PATTERNS
=
[
{
pattern:
%r{
\A
-/system/appearance/logo/(
\d
+)/}
,
uploader:
'AttachmentUploader'
,
model_type:
'Appearance'
},
{
pattern:
%r{
\A
-/system/appearance/header_logo/(
\d
+)/}
,
uploader:
'AttachmentUploader'
,
model_type:
'Appearance'
},
{
pattern:
%r{
\A
-/system/note/attachment/(
\d
+)/}
,
uploader:
'AttachmentUploader'
,
model_type:
'Note'
},
{
pattern:
%r{
\A
-/system/user/avatar/(
\d
+)/}
,
uploader:
'AvatarUploader'
,
model_type:
'User'
},
{
pattern:
%r{
\A
-/system/group/avatar/(
\d
+)/}
,
uploader:
'AvatarUploader'
,
model_type:
'Namespace'
},
{
pattern:
%r{
\A
-/system/project/avatar/(
\d
+)/}
,
uploader:
'AvatarUploader'
,
model_type:
'Project'
},
{
pattern:
FILE_UPLOADER_PATH
,
uploader:
'FileUploader'
,
model_type:
'Project'
}
].
freeze
def
to_h
@upload_hash
||=
{
path:
upload_path
,
uploader:
uploader
,
model_type:
model_type
,
model_id:
model_id
,
size:
file_size
,
checksum:
checksum
}
end
def
upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
@upload_path
||=
if
uploader
==
'FileUploader'
# Path relative to project directory in uploads
matchd
=
path_relative_to_upload_dir
.
match
(
FILE_UPLOADER_PATH
)
matchd
[
0
].
sub
(
%r{
\A
/}
,
''
)
# remove leading slash
else
path
end
end
def
uploader
matching_pattern_map
[
:uploader
]
end
def
model_type
matching_pattern_map
[
:model_type
]
end
def
model_id
return
@model_id
if
defined?
(
@model_id
)
pattern
=
matching_pattern_map
[
:pattern
]
matchd
=
path_relative_to_upload_dir
.
match
(
pattern
)
# If something is captured (matchd[1] is not nil), it is a model_id
# Only the FileUploader pattern will not match an ID
@model_id
=
matchd
[
1
]
?
matchd
[
1
].
to_i
:
file_uploader_model_id
end
def
file_size
File
.
size
(
absolute_path
)
end
def
checksum
Digest
::
SHA256
.
file
(
absolute_path
).
hexdigest
end
private
def
matching_pattern_map
@matching_pattern_map
||=
PATH_PATTERNS
.
find
do
|
path_pattern_map
|
path_relative_to_upload_dir
.
match
(
path_pattern_map
[
:pattern
])
end
unless
@matching_pattern_map
raise
"Unknown upload path pattern
\"
#{
path
}
\"
"
end
@matching_pattern_map
end
def
file_uploader_model_id
matchd
=
path_relative_to_upload_dir
.
match
(
FULL_PATH_CAPTURE
)
not_found_msg
=
<<~
MSG
Could not capture project full_path from a FileUploader path:
"
#{
path_relative_to_upload_dir
}
"
MSG
raise
not_found_msg
unless
matchd
full_path
=
matchd
[
1
]
project
=
Project
.
find_by_full_path
(
full_path
)
return
nil
unless
project
project
.
id
end
# Not including a leading slash
def
path_relative_to_upload_dir
upload_dir
=
Gitlab
::
BackgroundMigration
::
PrepareUntrackedUploads
::
RELATIVE_UPLOAD_DIR
# rubocop:disable Metrics/LineLength
base
=
%r{
\A
#{
Regexp
.
escape
(
upload_dir
)
}
/}
@path_relative_to_upload_dir
||=
path
.
sub
(
base
,
''
)
end
def
absolute_path
File
.
join
(
CarrierWave
.
root
,
path
)
end
end
# This class is used to query the `uploads` table.
class
Upload
<
ActiveRecord
::
Base
self
.
table_name
=
'uploads'
end
def
perform
(
start_id
,
end_id
)
return
unless
migrate?
files
=
UntrackedFile
.
where
(
id:
start_id
..
end_id
)
processed_files
=
insert_uploads_if_needed
(
files
)
processed_files
.
delete_all
drop_temp_table_if_finished
end
private
def
migrate?
UntrackedFile
.
table_exists?
&&
Upload
.
table_exists?
end
def
insert_uploads_if_needed
(
files
)
filtered_files
,
error_files
=
filter_error_files
(
files
)
filtered_files
=
filter_existing_uploads
(
filtered_files
)
filtered_files
=
filter_deleted_models
(
filtered_files
)
insert
(
filtered_files
)
processed_files
=
files
.
where
.
not
(
id:
error_files
.
map
(
&
:id
))
processed_files
end
def
filter_error_files
(
files
)
files
.
partition
do
|
file
|
begin
file
.
to_h
true
rescue
=>
e
msg
=
<<~
MSG
Error parsing path "
#{
file
.
path
}
":
#{
e
.
message
}
#{
e
.
backtrace
.
join
(
"
\n
"
)
}
MSG
Rails
.
logger
.
error
(
msg
)
false
end
end
end
def
filter_existing_uploads
(
files
)
paths
=
files
.
map
(
&
:upload_path
)
existing_paths
=
Upload
.
where
(
path:
paths
).
pluck
(
:path
).
to_set
files
.
reject
do
|
file
|
existing_paths
.
include?
(
file
.
upload_path
)
end
end
# There are files on disk that are not in the uploads table because their
# model was deleted, and we don't delete the files on disk.
def
filter_deleted_models
(
files
)
ids
=
deleted_model_ids
(
files
)
files
.
reject
do
|
file
|
ids
[
file
.
model_type
].
include?
(
file
.
model_id
)
end
end
def
deleted_model_ids
(
files
)
ids
=
{
'Appearance'
=>
[],
'Namespace'
=>
[],
'Note'
=>
[],
'Project'
=>
[],
'User'
=>
[]
}
# group model IDs by model type
files
.
each
do
|
file
|
ids
[
file
.
model_type
]
<<
file
.
model_id
end
ids
.
each
do
|
model_type
,
model_ids
|
model_class
=
Object
.
const_get
(
model_type
)
found_ids
=
model_class
.
where
(
id:
model_ids
.
uniq
).
pluck
(
:id
)
deleted_ids
=
ids
[
model_type
]
-
found_ids
ids
[
model_type
]
=
deleted_ids
end
ids
end
def
insert
(
files
)
rows
=
files
.
map
do
|
file
|
file
.
to_h
.
merge
(
created_at:
'NOW()'
)
end
Gitlab
::
Database
.
bulk_insert
(
'uploads'
,
rows
,
disable_quote: :created_at
)
end
def
drop_temp_table_if_finished
if
UntrackedFile
.
all
.
empty?
UntrackedFile
.
connection
.
drop_table
(
:untracked_files_for_uploads
,
if_exists:
true
)
end
end
end
end
end
lib/gitlab/background_migration/prepare_untracked_uploads.rb
0 → 100644
View file @
978dbbd0
# frozen_string_literal: true
module
Gitlab
module
BackgroundMigration
# This class finds all non-hashed uploaded file paths and saves them to a
# `untracked_files_for_uploads` table.
class
PrepareUntrackedUploads
# rubocop:disable Metrics/ClassLength
# For bulk_queue_background_migration_jobs_by_range
include
Database
::
MigrationHelpers
FIND_BATCH_SIZE
=
500
RELATIVE_UPLOAD_DIR
=
"uploads"
.
freeze
ABSOLUTE_UPLOAD_DIR
=
"
#{
CarrierWave
.
root
}
/
#{
RELATIVE_UPLOAD_DIR
}
"
.
freeze
FOLLOW_UP_MIGRATION
=
'PopulateUntrackedUploads'
.
freeze
START_WITH_CARRIERWAVE_ROOT_REGEX
=
%r{
\A
#{
CarrierWave
.
root
}
/}
EXCLUDED_HASHED_UPLOADS_PATH
=
"
#{
ABSOLUTE_UPLOAD_DIR
}
/@hashed/*"
.
freeze
EXCLUDED_TMP_UPLOADS_PATH
=
"
#{
ABSOLUTE_UPLOAD_DIR
}
/tmp/*"
.
freeze
# This class is used to iterate over batches of
# `untracked_files_for_uploads` rows.
class
UntrackedFile
<
ActiveRecord
::
Base
include
EachBatch
self
.
table_name
=
'untracked_files_for_uploads'
end
def
perform
ensure_temporary_tracking_table_exists
# Since Postgres < 9.5 does not have ON CONFLICT DO NOTHING, and since
# doing inserts-if-not-exists without ON CONFLICT DO NOTHING would be
# slow, start with an empty table for Postgres < 9.5.
# That way we can do bulk inserts at ~30x the speed of individual
# inserts (~20 minutes worth of inserts at GitLab.com scale instead of
# ~10 hours).
# In all other cases, installations will get both bulk inserts and the
# ability for these jobs to retry without having to clear and reinsert.
clear_untracked_file_paths
unless
can_bulk_insert_and_ignore_duplicates?
store_untracked_file_paths
schedule_populate_untracked_uploads_jobs
end
private
def
ensure_temporary_tracking_table_exists
table_name
=
:untracked_files_for_uploads
unless
UntrackedFile
.
connection
.
table_exists?
(
table_name
)
UntrackedFile
.
connection
.
create_table
table_name
do
|
t
|
t
.
string
:path
,
limit:
600
,
null:
false
t
.
index
:path
,
unique:
true
end
end
end
def
clear_untracked_file_paths
UntrackedFile
.
delete_all
end
def
store_untracked_file_paths
return
unless
Dir
.
exist?
(
ABSOLUTE_UPLOAD_DIR
)
each_file_batch
(
ABSOLUTE_UPLOAD_DIR
,
FIND_BATCH_SIZE
)
do
|
file_paths
|
insert_file_paths
(
file_paths
)
end
end
def
each_file_batch
(
search_dir
,
batch_size
,
&
block
)
cmd
=
build_find_command
(
search_dir
)
Open3
.
popen2
(
*
cmd
)
do
|
stdin
,
stdout
,
status_thread
|
yield_paths_in_batches
(
stdout
,
batch_size
,
&
block
)
raise
"Find command failed"
unless
status_thread
.
value
.
success?
end
end
def
yield_paths_in_batches
(
stdout
,
batch_size
,
&
block
)
paths
=
[]
stdout
.
each_line
(
"
\0
"
)
do
|
line
|
paths
<<
line
.
chomp
(
"
\0
"
).
sub
(
START_WITH_CARRIERWAVE_ROOT_REGEX
,
''
)
if
paths
.
size
>=
batch_size
yield
(
paths
)
paths
=
[]
end
end
yield
(
paths
)
end
def
build_find_command
(
search_dir
)
cmd
=
%W[find -L
#{
search_dir
}
-type f
! ( -path
#{
EXCLUDED_HASHED_UPLOADS_PATH
}
-prune )
! ( -path
#{
EXCLUDED_TMP_UPLOADS_PATH
}
-prune )
-print0]
ionice
=
which_ionice
cmd
=
%W[
#{
ionice
}
-c Idle]
+
cmd
if
ionice
log_msg
=
"PrepareUntrackedUploads find command:
\"
#{
cmd
.
join
(
' '
)
}
\"
"
Rails
.
logger
.
info
log_msg
cmd
end
def
which_ionice
Gitlab
::
Utils
.
which
(
'ionice'
)
rescue
StandardError
# In this case, returning false is relatively safe,
# even though it isn't very nice
false
end
def
insert_file_paths
(
file_paths
)
sql
=
insert_sql
(
file_paths
)
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
end
def
insert_sql
(
file_paths
)
if
postgresql_pre_9_5?
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
elsif
postgresql?
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
"
\
" ON CONFLICT DO NOTHING;"
else
# MySQL
"INSERT IGNORE INTO"
\
"
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
end
end
def
table_columns_and_values_for_insert
(
file_paths
)
values
=
file_paths
.
map
do
|
file_path
|
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
'(?)'
,
file_path
])
# rubocop:disable GitlabSecurity/PublicSend, Metrics/LineLength
end
.
join
(
', '
)
"
#{
UntrackedFile
.
table_name
}
(path) VALUES
#{
values
}
"
end
def
postgresql?
@postgresql
||=
Gitlab
::
Database
.
postgresql?
end
def
can_bulk_insert_and_ignore_duplicates?
!
postgresql_pre_9_5?
end
def
postgresql_pre_9_5?
@postgresql_pre_9_5
||=
postgresql?
&&
Gitlab
::
Database
.
version
.
to_f
<
9.5
end
def
schedule_populate_untracked_uploads_jobs
bulk_queue_background_migration_jobs_by_range
(
UntrackedFile
,
FOLLOW_UP_MIGRATION
)
end
end
end
end
lib/gitlab/database.rb
View file @
978dbbd0
...
@@ -120,15 +120,21 @@ module Gitlab
...
@@ -120,15 +120,21 @@ module Gitlab
# values.
# values.
# return_ids - When set to true the return value will be an Array of IDs of
# return_ids - When set to true the return value will be an Array of IDs of
# the inserted rows, this only works on PostgreSQL.
# the inserted rows, this only works on PostgreSQL.
def
self
.
bulk_insert
(
table
,
rows
,
return_ids:
false
)
# disable_quote - A key or an Array of keys to exclude from quoting (You
# become responsible for protection from SQL injection for
# these keys!)
def
self
.
bulk_insert
(
table
,
rows
,
return_ids:
false
,
disable_quote:
[])
return
if
rows
.
empty?
return
if
rows
.
empty?
keys
=
rows
.
first
.
keys
keys
=
rows
.
first
.
keys
columns
=
keys
.
map
{
|
key
|
connection
.
quote_column_name
(
key
)
}
columns
=
keys
.
map
{
|
key
|
connection
.
quote_column_name
(
key
)
}
return_ids
=
false
if
mysql?
return_ids
=
false
if
mysql?
disable_quote
=
Array
(
disable_quote
).
to_set
tuples
=
rows
.
map
do
|
row
|
tuples
=
rows
.
map
do
|
row
|
row
.
values_at
(
*
keys
).
map
{
|
value
|
connection
.
quote
(
value
)
}
keys
.
map
do
|
k
|
disable_quote
.
include?
(
k
)
?
row
[
k
]
:
connection
.
quote
(
row
[
k
])
end
end
end
sql
=
<<-
EOF
sql
=
<<-
EOF
...
...
lib/gitlab/utils.rb
View file @
978dbbd0
...
@@ -47,13 +47,6 @@ module Gitlab
...
@@ -47,13 +47,6 @@ module Gitlab
Random
.
rand
(
Float
::
MAX
.
to_i
).
to_s
(
36
)
Random
.
rand
(
Float
::
MAX
.
to_i
).
to_s
(
36
)
end
end
# EE below
def
try_megabytes_to_bytes
(
size
)
Integer
(
size
).
megabytes
rescue
ArgumentError
size
end
# See: http://stackoverflow.com/questions/2108727/which-in-ruby-checking-if-program-exists-in-path-from-ruby
# See: http://stackoverflow.com/questions/2108727/which-in-ruby-checking-if-program-exists-in-path-from-ruby
# Cross-platform way of finding an executable in the $PATH.
# Cross-platform way of finding an executable in the $PATH.
#
#
...
@@ -70,5 +63,12 @@ module Gitlab
...
@@ -70,5 +63,12 @@ module Gitlab
nil
nil
end
end
# EE below
def
try_megabytes_to_bytes
(
size
)
Integer
(
size
).
megabytes
rescue
ArgumentError
size
end
end
end
end
end
spec/lib/gitlab/background_migration/populate_untracked_uploads_spec.rb
0 → 100644
View file @
978dbbd0
This diff is collapsed.
Click to expand it.
spec/lib/gitlab/background_migration/prepare_untracked_uploads_spec.rb
0 → 100644
View file @
978dbbd0
require
'spec_helper'
describe
Gitlab
::
BackgroundMigration
::
PrepareUntrackedUploads
,
:sidekiq
do
include
TrackUntrackedUploadsHelpers
let!
(
:untracked_files_for_uploads
)
{
described_class
::
UntrackedFile
}
matcher
:be_scheduled_migration
do
|*
expected
|
match
do
|
migration
|
BackgroundMigrationWorker
.
jobs
.
any?
do
|
job
|
job
[
'args'
]
==
[
migration
,
expected
]
end
end
failure_message
do
|
migration
|
"Migration `
#{
migration
}
` with args `
#{
expected
.
inspect
}
` not scheduled!"
end
end
before
do
DatabaseCleaner
.
clean
drop_temp_table_if_exists
end
after
do
drop_temp_table_if_exists
end
around
do
|
example
|
# Especially important so the follow-up migration does not get run
Sidekiq
::
Testing
.
fake!
do
example
.
run
end
end
it
'ensures the untracked_files_for_uploads table exists'
do
expect
do
described_class
.
new
.
perform
end
.
to
change
{
ActiveRecord
::
Base
.
connection
.
table_exists?
(
:untracked_files_for_uploads
)
}.
from
(
false
).
to
(
true
)
end
it
'has a path field long enough for really long paths'
do
described_class
.
new
.
perform
component
=
'a'
*
255
long_path
=
[
'uploads'
,
component
,
# project.full_path
component
# filename
].
flatten
.
join
(
'/'
)
record
=
untracked_files_for_uploads
.
create!
(
path:
long_path
)
expect
(
record
.
reload
.
path
.
size
).
to
eq
(
519
)
end
context
"test bulk insert with ON CONFLICT DO NOTHING or IGNORE"
do
around
do
|
example
|
# If this is CI, we use Postgres 9.2 so this whole context should be
# skipped since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
if
described_class
.
new
.
send
(
:can_bulk_insert_and_ignore_duplicates?
)
example
.
run
end
end
context
'when files were uploaded before and after hashed storage was enabled'
do
let!
(
:appearance
)
{
create_or_update_appearance
(
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
before
do
# Markdown upload before enabling hashed_storage
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
stub_application_setting
(
hashed_storage_enabled:
true
)
# Markdown upload after enabling hashed_storage
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
it
'adds unhashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
it
'adds files with paths relative to CarrierWave.root'
do
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
end
end
it
'does not add hashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
it
'correctly schedules the follow-up background migration jobs'
do
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
# E.g. from a previous failed run of this background migration
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
end
it
'does not error or produce duplicates of existing data'
do
expect
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
before
do
FileUtils
.
touch
(
tmp_file
)
end
after
do
FileUtils
.
rm
(
tmp_file
)
end
it
'does not add files from /uploads/tmp'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
end
end
context
'test bulk insert without ON CONFLICT DO NOTHING or IGNORE'
do
before
do
# If this is CI, we use Postgres 9.2 so this stub has no effect.
#
# If this is being run on Postgres 9.5+ or MySQL, then this stub allows us
# to test the bulk insert functionality without ON CONFLICT DO NOTHING or
# IGNORE.
allow_any_instance_of
(
described_class
).
to
receive
(
:postgresql_pre_9_5?
).
and_return
(
true
)
end
context
'when files were uploaded before and after hashed storage was enabled'
do
let!
(
:appearance
)
{
create_or_update_appearance
(
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
before
do
# Markdown upload before enabling hashed_storage
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
stub_application_setting
(
hashed_storage_enabled:
true
)
# Markdown upload after enabling hashed_storage
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
it
'adds unhashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
it
'adds files with paths relative to CarrierWave.root'
do
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
end
end
it
'does not add hashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
it
'correctly schedules the follow-up background migration jobs'
do
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
# E.g. from a previous failed run of this background migration
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
end
it
'does not error or produce duplicates of existing data'
do
expect
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
before
do
FileUtils
.
touch
(
tmp_file
)
end
after
do
FileUtils
.
rm
(
tmp_file
)
end
it
'does not add files from /uploads/tmp'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
end
end
# Very new or lightly-used installations that are running this migration
# may not have an upload directory because they have no uploads.
context
'when no files were ever uploaded'
do
it
'does not add to the untracked_files_for_uploads table (and does not raise error)'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
0
)
end
end
end
spec/lib/gitlab/database_spec.rb
View file @
978dbbd0
...
@@ -199,6 +199,22 @@ describe Gitlab::Database do
...
@@ -199,6 +199,22 @@ describe Gitlab::Database do
described_class
.
bulk_insert
(
'test'
,
rows
)
described_class
.
bulk_insert
(
'test'
,
rows
)
end
end
it
'does not quote values of a column in the disable_quote option'
do
[
1
,
2
,
4
,
5
].
each
do
|
i
|
expect
(
connection
).
to
receive
(
:quote
).
with
(
i
)
end
described_class
.
bulk_insert
(
'test'
,
rows
,
disable_quote: :c
)
end
it
'does not quote values of columns in the disable_quote option'
do
[
2
,
5
].
each
do
|
i
|
expect
(
connection
).
to
receive
(
:quote
).
with
(
i
)
end
described_class
.
bulk_insert
(
'test'
,
rows
,
disable_quote:
[
:a
,
:c
])
end
it
'handles non-UTF-8 data'
do
it
'handles non-UTF-8 data'
do
expect
{
described_class
.
bulk_insert
(
'test'
,
[{
a:
"
\255
"
}])
}.
not_to
raise_error
expect
{
described_class
.
bulk_insert
(
'test'
,
[{
a:
"
\255
"
}])
}.
not_to
raise_error
end
end
...
...
spec/lib/gitlab/utils_spec.rb
View file @
978dbbd0
require
'spec_helper'
require
'spec_helper'
describe
Gitlab
::
Utils
do
describe
Gitlab
::
Utils
do
delegate
:to_boolean
,
:boolean_to_yes_no
,
:slugify
,
:random_string
,
to: :described_class
delegate
:to_boolean
,
:boolean_to_yes_no
,
:slugify
,
:random_string
,
:which
,
to: :described_class
describe
'.slugify'
do
describe
'.slugify'
do
{
{
...
@@ -60,9 +60,6 @@ describe Gitlab::Utils do
...
@@ -60,9 +60,6 @@ describe Gitlab::Utils do
end
end
end
end
# EE
delegate
:which
,
to: :described_class
describe
'.which'
do
describe
'.which'
do
it
'finds the full path to an executable binary'
do
it
'finds the full path to an executable binary'
do
expect
(
File
).
to
receive
(
:executable?
).
with
(
'/bin/sh'
).
and_return
(
true
)
expect
(
File
).
to
receive
(
:executable?
).
with
(
'/bin/sh'
).
and_return
(
true
)
...
...
spec/migrations/track_untracked_uploads_spec.rb
0 → 100644
View file @
978dbbd0
require
'spec_helper'
require
Rails
.
root
.
join
(
'db'
,
'post_migrate'
,
'20171103140253_track_untracked_uploads'
)
describe
TrackUntrackedUploads
,
:migration
,
:sidekiq
do
include
TrackUntrackedUploadsHelpers
matcher
:be_scheduled_migration
do
match
do
|
migration
|
BackgroundMigrationWorker
.
jobs
.
any?
do
|
job
|
job
[
'args'
]
==
[
migration
]
end
end
failure_message
do
|
migration
|
"Migration `
#{
migration
}
` with args `
#{
expected
.
inspect
}
` not scheduled!"
end
end
it
'correctly schedules the follow-up background migration'
do
Sidekiq
::
Testing
.
fake!
do
migrate!
expect
(
described_class
::
MIGRATION
).
to
be_scheduled_migration
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
end
end
spec/support/track_untracked_uploads_helpers.rb
0 → 100644
View file @
978dbbd0
module
TrackUntrackedUploadsHelpers
def
uploaded_file
fixture_path
=
Rails
.
root
.
join
(
'spec'
,
'fixtures'
,
'rails_sample.jpg'
)
fixture_file_upload
(
fixture_path
)
end
def
ensure_temporary_tracking_table_exists
Gitlab
::
BackgroundMigration
::
PrepareUntrackedUploads
.
new
.
send
(
:ensure_temporary_tracking_table_exists
)
end
def
drop_temp_table_if_exists
ActiveRecord
::
Base
.
connection
.
drop_table
(
:untracked_files_for_uploads
)
if
ActiveRecord
::
Base
.
connection
.
table_exists?
(
:untracked_files_for_uploads
)
end
def
create_or_update_appearance
(
attrs
)
a
=
Appearance
.
first_or_initialize
(
title:
'foo'
,
description:
'bar'
)
a
.
update!
(
attrs
)
a
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment