Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
74b3870a
Commit
74b3870a
authored
Nov 26, 2017
by
Michael Kozono
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Address Rubocop offenses
parent
dd4b35f8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
82 additions
and
39 deletions
+82
-39
lib/gitlab/background_migration/populate_untracked_uploads.rb
...gitlab/background_migration/populate_untracked_uploads.rb
+43
-21
lib/gitlab/background_migration/prepare_untracked_uploads.rb
lib/gitlab/background_migration/prepare_untracked_uploads.rb
+39
-18
No files found.
lib/gitlab/background_migration/populate_untracked_uploads.rb
View file @
74b3870a
# frozen_string_literal: true
module
Gitlab
module
Gitlab
module
BackgroundMigration
module
BackgroundMigration
class
PopulateUntrackedUploads
# This class processes a batch of rows in `untracked_files_for_uploads` by
class
UntrackedFile
<
ActiveRecord
::
Base
# adding each file to the `uploads` table if it does not exist.
class
PopulateUntrackedUploads
# rubocop:disable Metrics/ClassLength
# This class is responsible for producing the attributes necessary to
# track an uploaded file in the `uploads` table.
class
UntrackedFile
<
ActiveRecord
::
Base
# rubocop:disable Metrics/ClassLength, Metrics/LineLength
self
.
table_name
=
'untracked_files_for_uploads'
self
.
table_name
=
'untracked_files_for_uploads'
# Ends with /:random_hex/:filename
# Ends with /:random_hex/:filename
FILE_UPLOADER_PATH
_PATTERN
=
%r{/
\h
+/[^/]+
\z
}
FILE_UPLOADER_PATH
=
%r{/
\h
+/[^/]+
\z
}
F
ILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN
=
%r{
\A
(.+)
#{
FILE_UPLOADER_PATH_PATTERN
}
}
F
ULL_PATH_CAPTURE
=
%r{
\A
(.+)
#{
FILE_UPLOADER_PATH
}
}
# These regex patterns are tested against a relative path, relative to
# These regex patterns are tested against a relative path, relative to
# the upload directory.
# the upload directory.
...
@@ -44,7 +50,7 @@ module Gitlab
...
@@ -44,7 +50,7 @@ module Gitlab
model_type:
'Project'
model_type:
'Project'
},
},
{
{
pattern:
FILE_UPLOADER_PATH
_PATTERN
,
pattern:
FILE_UPLOADER_PATH
,
uploader:
'FileUploader'
,
uploader:
'FileUploader'
,
model_type:
'Project'
model_type:
'Project'
}
}
...
@@ -63,13 +69,14 @@ module Gitlab
...
@@ -63,13 +69,14 @@ module Gitlab
def
upload_path
def
upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
# UntrackedFile#path is absolute, but Upload#path depends on uploader
@upload_path
||=
if
uploader
==
'FileUploader'
@upload_path
||=
# Path relative to project directory in uploads
if
uploader
==
'FileUploader'
matchd
=
path_relative_to_upload_dir
.
match
(
FILE_UPLOADER_PATH_PATTERN
)
# Path relative to project directory in uploads
matchd
[
0
].
sub
(
%r{
\A
/}
,
''
)
# remove leading slash
matchd
=
path_relative_to_upload_dir
.
match
(
FILE_UPLOADER_PATH
)
else
matchd
[
0
].
sub
(
%r{
\A
/}
,
''
)
# remove leading slash
path
else
end
path
end
end
end
def
uploader
def
uploader
...
@@ -83,7 +90,8 @@ module Gitlab
...
@@ -83,7 +90,8 @@ module Gitlab
def
model_id
def
model_id
return
@model_id
if
defined?
(
@model_id
)
return
@model_id
if
defined?
(
@model_id
)
matchd
=
path_relative_to_upload_dir
.
match
(
matching_pattern_map
[
:pattern
])
pattern
=
matching_pattern_map
[
:pattern
]
matchd
=
path_relative_to_upload_dir
.
match
(
pattern
)
# If something is captured (matchd[1] is not nil), it is a model_id
# If something is captured (matchd[1] is not nil), it is a model_id
# Only the FileUploader pattern will not match an ID
# Only the FileUploader pattern will not match an ID
...
@@ -105,14 +113,20 @@ module Gitlab
...
@@ -105,14 +113,20 @@ module Gitlab
path_relative_to_upload_dir
.
match
(
path_pattern_map
[
:pattern
])
path_relative_to_upload_dir
.
match
(
path_pattern_map
[
:pattern
])
end
end
raise
"Unknown upload path pattern
\"
#{
path
}
\"
"
unless
@matching_pattern_map
unless
@matching_pattern_map
raise
"Unknown upload path pattern
\"
#{
path
}
\"
"
end
@matching_pattern_map
@matching_pattern_map
end
end
def
file_uploader_model_id
def
file_uploader_model_id
matchd
=
path_relative_to_upload_dir
.
match
(
FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN
)
matchd
=
path_relative_to_upload_dir
.
match
(
FULL_PATH_CAPTURE
)
raise
"Could not capture project full_path from a FileUploader path:
\"
#{
path_relative_to_upload_dir
}
\"
"
unless
matchd
not_found_msg
=
<<~
MSG
Could not capture project full_path from a FileUploader path:
"
#{
path_relative_to_upload_dir
}
"
MSG
raise
not_found_msg
unless
matchd
full_path
=
matchd
[
1
]
full_path
=
matchd
[
1
]
project
=
Project
.
find_by_full_path
(
full_path
)
project
=
Project
.
find_by_full_path
(
full_path
)
...
@@ -123,7 +137,8 @@ module Gitlab
...
@@ -123,7 +137,8 @@ module Gitlab
# Not including a leading slash
# Not including a leading slash
def
path_relative_to_upload_dir
def
path_relative_to_upload_dir
base
=
%r{
\A
#{
Regexp
.
escape
(
Gitlab
::
BackgroundMigration
::
PrepareUntrackedUploads
::
RELATIVE_UPLOAD_DIR
)
}
/}
upload_dir
=
Gitlab
::
BackgroundMigration
::
PrepareUntrackedUploads
::
RELATIVE_UPLOAD_DIR
# rubocop:disable Metrics/LineLength
base
=
%r{
\A
#{
Regexp
.
escape
(
upload_dir
)
}
/}
@path_relative_to_upload_dir
||=
path
.
sub
(
base
,
''
)
@path_relative_to_upload_dir
||=
path
.
sub
(
base
,
''
)
end
end
...
@@ -132,6 +147,7 @@ module Gitlab
...
@@ -132,6 +147,7 @@ module Gitlab
end
end
end
end
# This class is used to query the `uploads` table.
class
Upload
<
ActiveRecord
::
Base
class
Upload
<
ActiveRecord
::
Base
self
.
table_name
=
'uploads'
self
.
table_name
=
'uploads'
end
end
...
@@ -192,8 +208,10 @@ module Gitlab
...
@@ -192,8 +208,10 @@ module Gitlab
end
end
ids
.
each
do
|
model_type
,
model_ids
|
ids
.
each
do
|
model_type
,
model_ids
|
found_ids
=
Object
.
const_get
(
model_type
).
where
(
id:
model_ids
.
uniq
).
pluck
(
:id
)
model_class
=
Object
.
const_get
(
model_type
)
ids
[
model_type
]
=
ids
[
model_type
]
-
found_ids
# replace with deleted ids
found_ids
=
model_class
.
where
(
id:
model_ids
.
uniq
).
pluck
(
:id
)
deleted_ids
=
ids
[
model_type
]
-
found_ids
ids
[
model_type
]
=
deleted_ids
end
end
ids
ids
...
@@ -204,11 +222,15 @@ module Gitlab
...
@@ -204,11 +222,15 @@ module Gitlab
file
.
to_h
.
merge
(
created_at:
'NOW()'
)
file
.
to_h
.
merge
(
created_at:
'NOW()'
)
end
end
Gitlab
::
Database
.
bulk_insert
(
'uploads'
,
rows
,
disable_quote: :created_at
)
Gitlab
::
Database
.
bulk_insert
(
'uploads'
,
rows
,
disable_quote: :created_at
)
end
end
def
drop_temp_table_if_finished
def
drop_temp_table_if_finished
UntrackedFile
.
connection
.
drop_table
(
:untracked_files_for_uploads
)
if
UntrackedFile
.
all
.
empty?
if
UntrackedFile
.
all
.
empty?
UntrackedFile
.
connection
.
drop_table
(
:untracked_files_for_uploads
)
end
end
end
end
end
end
end
...
...
lib/gitlab/background_migration/prepare_untracked_uploads.rb
View file @
74b3870a
# frozen_string_literal: true
module
Gitlab
module
Gitlab
module
BackgroundMigration
module
BackgroundMigration
class
PrepareUntrackedUploads
# This class finds all non-hashed uploaded file paths and saves them to a
# `untracked_files_for_uploads` table.
class
PrepareUntrackedUploads
# rubocop:disable Metrics/ClassLength
# For bulk_queue_background_migration_jobs_by_range
# For bulk_queue_background_migration_jobs_by_range
include
Database
::
MigrationHelpers
include
Database
::
MigrationHelpers
FI
LE_PATH
_BATCH_SIZE
=
500
FI
ND
_BATCH_SIZE
=
500
RELATIVE_UPLOAD_DIR
=
"uploads"
.
freeze
RELATIVE_UPLOAD_DIR
=
"uploads"
.
freeze
ABSOLUTE_UPLOAD_DIR
=
"
#{
CarrierWave
.
root
}
/
#{
RELATIVE_UPLOAD_DIR
}
"
.
freeze
ABSOLUTE_UPLOAD_DIR
=
"
#{
CarrierWave
.
root
}
/
#{
RELATIVE_UPLOAD_DIR
}
"
.
freeze
FOLLOW_UP_MIGRATION
=
'PopulateUntrackedUploads'
.
freeze
FOLLOW_UP_MIGRATION
=
'PopulateUntrackedUploads'
.
freeze
...
@@ -12,6 +16,8 @@ module Gitlab
...
@@ -12,6 +16,8 @@ module Gitlab
EXCLUDED_HASHED_UPLOADS_PATH
=
"
#{
ABSOLUTE_UPLOAD_DIR
}
/@hashed/*"
.
freeze
EXCLUDED_HASHED_UPLOADS_PATH
=
"
#{
ABSOLUTE_UPLOAD_DIR
}
/@hashed/*"
.
freeze
EXCLUDED_TMP_UPLOADS_PATH
=
"
#{
ABSOLUTE_UPLOAD_DIR
}
/tmp/*"
.
freeze
EXCLUDED_TMP_UPLOADS_PATH
=
"
#{
ABSOLUTE_UPLOAD_DIR
}
/tmp/*"
.
freeze
# This class is used to iterate over batches of
# `untracked_files_for_uploads` rows.
class
UntrackedFile
<
ActiveRecord
::
Base
class
UntrackedFile
<
ActiveRecord
::
Base
include
EachBatch
include
EachBatch
...
@@ -39,8 +45,9 @@ module Gitlab
...
@@ -39,8 +45,9 @@ module Gitlab
private
private
def
ensure_temporary_tracking_table_exists
def
ensure_temporary_tracking_table_exists
unless
UntrackedFile
.
connection
.
table_exists?
(
:untracked_files_for_uploads
)
table_name
=
:untracked_files_for_uploads
UntrackedFile
.
connection
.
create_table
:untracked_files_for_uploads
do
|
t
|
unless
UntrackedFile
.
connection
.
table_exists?
(
table_name
)
UntrackedFile
.
connection
.
create_table
table_name
do
|
t
|
t
.
string
:path
,
limit:
600
,
null:
false
t
.
string
:path
,
limit:
600
,
null:
false
t
.
index
:path
,
unique:
true
t
.
index
:path
,
unique:
true
end
end
...
@@ -54,7 +61,7 @@ module Gitlab
...
@@ -54,7 +61,7 @@ module Gitlab
def
store_untracked_file_paths
def
store_untracked_file_paths
return
unless
Dir
.
exist?
(
ABSOLUTE_UPLOAD_DIR
)
return
unless
Dir
.
exist?
(
ABSOLUTE_UPLOAD_DIR
)
each_file_batch
(
ABSOLUTE_UPLOAD_DIR
,
FI
LE_PATH
_BATCH_SIZE
)
do
|
file_paths
|
each_file_batch
(
ABSOLUTE_UPLOAD_DIR
,
FI
ND
_BATCH_SIZE
)
do
|
file_paths
|
insert_file_paths
(
file_paths
)
insert_file_paths
(
file_paths
)
end
end
end
end
...
@@ -85,12 +92,17 @@ module Gitlab
...
@@ -85,12 +92,17 @@ module Gitlab
end
end
def
build_find_command
(
search_dir
)
def
build_find_command
(
search_dir
)
cmd
=
%W[find
#{
search_dir
}
-type f ! ( -path
#{
EXCLUDED_HASHED_UPLOADS_PATH
}
-prune ) ! ( -path
#{
EXCLUDED_TMP_UPLOADS_PATH
}
-prune ) -print0]
cmd
=
%W[find
#{
search_dir
}
-type f
! ( -path
#{
EXCLUDED_HASHED_UPLOADS_PATH
}
-prune )
! ( -path
#{
EXCLUDED_TMP_UPLOADS_PATH
}
-prune )
-print0]
ionice
=
which_ionice
ionice
=
which_ionice
cmd
=
%W[
#{
ionice
}
-c Idle]
+
cmd
if
ionice
cmd
=
%W[
#{
ionice
}
-c Idle]
+
cmd
if
ionice
Rails
.
logger
.
info
"PrepareUntrackedUploads find command:
\"
#{
cmd
.
join
(
' '
)
}
\"
"
log_msg
=
"PrepareUntrackedUploads find command:
\"
#{
cmd
.
join
(
' '
)
}
\"
"
Rails
.
logger
.
info
log_msg
cmd
cmd
end
end
...
@@ -98,25 +110,32 @@ module Gitlab
...
@@ -98,25 +110,32 @@ module Gitlab
def
which_ionice
def
which_ionice
Gitlab
::
Utils
.
which
(
'ionice'
)
Gitlab
::
Utils
.
which
(
'ionice'
)
rescue
StandardError
rescue
StandardError
# In this case, returning false is relatively safe, even though it isn't very nice
# In this case, returning false is relatively safe,
# even though it isn't very nice
false
false
end
end
def
insert_file_paths
(
file_paths
)
def
insert_file_paths
(
file_paths
)
sql
=
if
postgresql_pre_9_5?
sql
=
insert_sql
(
file_paths
)
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
elsif
postgresql?
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
ON CONFLICT DO NOTHING;"
else
# MySQL
"INSERT IGNORE INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
end
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
end
end
def
insert_sql
(
file_paths
)
if
postgresql_pre_9_5?
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
elsif
postgresql?
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
"
\
" ON CONFLICT DO NOTHING;"
else
# MySQL
"INSERT IGNORE INTO"
\
"
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
end
end
def
table_columns_and_values_for_insert
(
file_paths
)
def
table_columns_and_values_for_insert
(
file_paths
)
values
=
file_paths
.
map
do
|
file_path
|
values
=
file_paths
.
map
do
|
file_path
|
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
'(?)'
,
file_path
])
# rubocop:disable GitlabSecurity/PublicSend
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
'(?)'
,
file_path
])
# rubocop:disable GitlabSecurity/PublicSend
, Metrics/LineLength
end
.
join
(
', '
)
end
.
join
(
', '
)
"
#{
UntrackedFile
.
table_name
}
(path) VALUES
#{
values
}
"
"
#{
UntrackedFile
.
table_name
}
(path) VALUES
#{
values
}
"
...
@@ -131,11 +150,13 @@ module Gitlab
...
@@ -131,11 +150,13 @@ module Gitlab
end
end
def
postgresql_pre_9_5?
def
postgresql_pre_9_5?
@postgresql_pre_9_5
||=
postgresql?
&&
Gitlab
::
Database
.
version
.
to_f
<
9.5
@postgresql_pre_9_5
||=
postgresql?
&&
Gitlab
::
Database
.
version
.
to_f
<
9.5
end
end
def
schedule_populate_untracked_uploads_jobs
def
schedule_populate_untracked_uploads_jobs
bulk_queue_background_migration_jobs_by_range
(
UntrackedFile
,
FOLLOW_UP_MIGRATION
)
bulk_queue_background_migration_jobs_by_range
(
UntrackedFile
,
FOLLOW_UP_MIGRATION
)
end
end
end
end
end
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment