Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
19966e70
Commit
19966e70
authored
Jul 05, 2018
by
Oswaldo Ferreira
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Wait postgresql vacuum of deadtuples on merge_request_diff_files deletion
parent
4455904b
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
87 additions
and
180 deletions
+87
-180
db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
...grate/20180619121030_enqueue_delete_diff_files_workers.rb
+3
-12
lib/gitlab/background_migration/delete_diff_files.rb
lib/gitlab/background_migration/delete_diff_files.rb
+58
-25
lib/gitlab/background_migration/schedule_diff_files_deletion.rb
...tlab/background_migration/schedule_diff_files_deletion.rb
+0
-61
spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
...lib/gitlab/background_migration/delete_diff_files_spec.rb
+24
-7
spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
...background_migration/schedule_diff_files_deletion_spec.rb
+0
-73
spec/migrations/enqueue_delete_diff_files_workers_spec.rb
spec/migrations/enqueue_delete_diff_files_workers_spec.rb
+2
-2
No files found.
db/post_migrate/20180619121030_enqueue_delete_diff_files_workers.rb
View file @
19966e70
...
...
@@ -2,24 +2,15 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
include
Gitlab
::
Database
::
MigrationHelpers
DOWNTIME
=
false
SCHEDULER
=
'ScheduleDiffFilesDeletion'
.
freeze
TMP_INDEX
=
'tmp_partial_diff_id_with_files_index'
.
freeze
MIGRATION
=
'DeleteDiffFiles'
.
freeze
disable_ddl_transaction!
def
up
unless
index_exists_by_name?
(
:merge_request_diffs
,
TMP_INDEX
)
add_concurrent_index
(
:merge_request_diffs
,
:id
,
where:
"(state NOT IN ('without_files', 'empty'))"
,
name:
TMP_INDEX
)
end
# We keep the index since this will be used async.
# Ideally we should remove it in an upcoming release.
BackgroundMigrationWorker
.
perform_async
(
SCHEDULER
)
BackgroundMigrationWorker
.
perform_async
(
MIGRATION
)
end
def
down
if
index_exists_by_name?
(
:merge_request_diffs
,
TMP_INDEX
)
remove_concurrent_index_by_name
(
:merge_request_diffs
,
TMP_INDEX
)
end
# no-op
end
end
lib/gitlab/background_migration/delete_diff_files.rb
View file @
19966e70
...
...
@@ -8,6 +8,7 @@ module Gitlab
self
.
table_name
=
'merge_request_diffs'
belongs_to
:merge_request
has_many
:merge_request_diff_files
include
EachBatch
end
...
...
@@ -18,41 +19,73 @@ module Gitlab
include
EachBatch
end
def
perform
(
merge_request_diff_id
)
merge_request_diff
=
MergeRequestDiff
.
find_by
(
id:
merge_request_diff_id
)
BATCH
=
5_000
DEAD_TUPLES_THRESHOLD
=
50_000
VACUUM_WAIT_TIME
=
5
.
minutes
return
unless
merge_request_diff
return
unless
should_delete_diff_files?
(
merge_request_diff
)
def
perform
diffs_with_files
=
MergeRequestDiff
.
joins
(
:merge_request
)
.
where
(
"merge_requests.state = 'merged'"
)
.
where
(
'merge_requests.latest_merge_request_diff_id IS NOT NULL'
)
.
where
(
'merge_requests.latest_merge_request_diff_id != merge_request_diffs.id'
)
.
where
(
"merge_request_diffs.state NOT IN ('without_files', 'empty')"
)
MergeRequestDiff
.
transaction
do
merge_request_diff
.
update_column
(
:state
,
'without_files'
)
diffs_with_files
.
each_batch
(
of:
BATCH
)
do
|
batch
,
index
|
wait_deadtuple_vacuum
(
index
)
prune_diff_files
(
batch
,
index
)
end
end
def
wait_deadtuple_vacuum
(
index
)
db_klass
=
Gitlab
::
Database
# explain (analyze, buffers) when deleting 453 diff files:
#
# Delete on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actual time=43.265..43.265 rows=0 loops=1)
# Buffers: shared hit=2043 read=259 dirtied=254
# -> Index Scan using index_merge_request_diff_files_on_mr_diff_id_and_order on merge_request_diff_files (cost=0.57..8487.35 rows=4846 width=6) (actu
# al time=0.466..26.317 rows=453 loops=1)
# Index Cond: (merge_request_diff_id = 463448)
# Buffers: shared hit=17 read=84
# Planning time: 0.107 ms
# Execution time: 43.287 ms
#
MergeRequestDiffFile
.
where
(
merge_request_diff_id:
merge_request_diff
.
id
).
delete_all
if
defined?
(
db_klass
)
&&
db_klass
.
respond_to?
(
:postgresql?
)
&&
db_klass
.
postgresql?
while
diff_files_dead_tuples_count
>=
DEAD_TUPLES_THRESHOLD
log_info
(
"Dead tuple threshold hit on merge_request_diff_files (
#{
index
}
th batch): "
\
"
#{
diff_files_dead_tuples_count
}
, waiting 5 minutes"
)
sleep
VACUUM_WAIT_TIME
end
end
end
private
def
should_delete_diff_files?
(
merge_request_diff
)
return
false
if
merge_request_diff
.
state
==
'without_files'
def
diff_files_dead_tuples_count
dead_tuple
=
execute_statement
(
"SELECT n_dead_tup FROM pg_stat_all_tables "
\
"WHERE relname = 'merge_request_diff_files'"
)[
0
]
if
dead_tuple
.
present?
dead_tuple
[
'n_dead_tup'
].
to_i
else
0
end
end
merge_request
=
merge_request_diff
.
merge_request
def
prune_diff_files
(
batch
,
index
)
diff_ids
=
batch
.
pluck
(
:id
)
return
false
unless
merge_request
.
state
==
'merged'
return
false
if
merge_request_diff
.
id
==
merge_request
.
latest_merge_request_diff_id
removed
=
0
updated
=
0
MergeRequestDiff
.
transaction
do
updated
=
MergeRequestDiff
.
where
(
id:
diff_ids
)
.
update_all
(
state:
'without_files'
)
removed
=
MergeRequestDiffFile
.
where
(
merge_request_diff_id:
diff_ids
)
.
delete_all
end
log_info
(
"
#{
index
}
th batch - Removed
#{
removed
}
merge_request_diff_files rows, "
\
"updated
#{
updated
}
merge_request_diffs rows"
)
end
def
execute_statement
(
sql
)
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
end
true
def
log_info
(
message
)
Rails
.
logger
.
info
(
"BackgroundMigration::DeleteDiffFiles -
#{
message
}
"
)
end
end
end
...
...
lib/gitlab/background_migration/schedule_diff_files_deletion.rb
deleted
100644 → 0
View file @
4455904b
# frozen_string_literal: true
# rubocop:disable Metrics/AbcSize
# rubocop:disable Style/Documentation
module
Gitlab
module
BackgroundMigration
class
ScheduleDiffFilesDeletion
class
MergeRequestDiff
<
ActiveRecord
::
Base
self
.
table_name
=
'merge_request_diffs'
has_many
:merge_request_diff_files
include
EachBatch
end
ITERATION_BATCH
=
1000
DELETION_BATCH
=
1000
# per minute
MIGRATION
=
'DeleteDiffFiles'
# Considering query times and Redis writings, this should take around 2
# hours to complete.
def
perform
diffs_with_files
=
MergeRequestDiff
.
where
.
not
(
state:
%w(without_files empty)
)
# This will be increased for each scheduled job
process_job_in
=
1
.
second
# explain (analyze, buffers) example for the iteration:
#
# Index Only Scan using tmp_index_20013 on merge_request_diffs (cost=0.43..1630.19 rows=60567 width=4) (actual time=0.047..9.572 rows=56976 loops=1)
# Index Cond: ((id >= 764586) AND (id < 835298))
# Heap Fetches: 8
# Buffers: shared hit=18188
# Planning time: 0.752 ms
# Execution time: 12.430 ms
#
diffs_with_files
.
reorder
(
nil
).
each_batch
(
of:
ITERATION_BATCH
)
do
|
relation
,
scheduler_index
|
relation
.
each
do
|
diff
|
BackgroundMigrationWorker
.
perform_in
(
process_job_in
,
MIGRATION
,
[
diff
.
id
])
diff_files_count
=
diff
.
merge_request_diff_files
.
reorder
(
nil
).
count
# We should limit on 1000 diff files deletion per minute to avoid
# replication lag issues.
#
interval
=
(
diff_files_count
.
to_f
/
DELETION_BATCH
).
minutes
process_job_in
+=
interval
end
end
log_days_to_process_all_jobs
(
process_job_in
)
end
def
log_days_to_process_all_jobs
(
seconds_to_process
)
days_to_process_all_jobs
=
(
seconds_to_process
/
60
/
60
/
24
).
to_i
Rails
.
logger
.
info
(
"Gitlab::BackgroundMigration::DeleteDiffFiles will take "
\
"
#{
days_to_process_all_jobs
}
days to be processed"
)
end
end
end
end
spec/lib/gitlab/background_migration/delete_diff_files_spec.rb
View file @
19966e70
...
...
@@ -4,19 +4,19 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
describe
'#perform'
do
context
'when diff files can be deleted'
do
let
(
:merge_request
)
{
create
(
:merge_request
,
:merged
)
}
let
(
:merge_request_diff
)
do
let
!
(
:merge_request_diff
)
do
merge_request
.
create_merge_request_diff
merge_request
.
merge_request_diffs
.
first
end
it
'deletes all merge request diff files'
do
expect
{
described_class
.
new
.
perform
(
merge_request_diff
.
id
)
}
expect
{
described_class
.
new
.
perform
}
.
to
change
{
merge_request_diff
.
merge_request_diff_files
.
count
}
.
from
(
20
).
to
(
0
)
end
it
'updates state to without_files'
do
expect
{
described_class
.
new
.
perform
(
merge_request_diff
.
id
)
}
expect
{
described_class
.
new
.
perform
}
.
to
change
{
merge_request_diff
.
reload
.
state
}
.
from
(
'collected'
).
to
(
'without_files'
)
end
...
...
@@ -25,7 +25,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
expect
(
described_class
::
MergeRequestDiffFile
).
to
receive_message_chain
(
:where
,
:delete_all
)
.
and_raise
expect
{
described_class
.
new
.
perform
(
merge_request_diff
.
id
)
}
expect
{
described_class
.
new
.
perform
}
.
to
raise_error
merge_request_diff
.
reload
...
...
@@ -40,7 +40,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
merge_request
.
create_merge_request_diff
merge_request_diff
=
merge_request
.
merge_request_diffs
.
first
expect
{
described_class
.
new
.
perform
(
merge_request_diff
.
id
)
}
expect
{
described_class
.
new
.
perform
}
.
not_to
change
{
merge_request_diff
.
merge_request_diff_files
.
count
}
.
from
(
20
)
end
...
...
@@ -52,7 +52,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
merge_request_diff
.
clean!
expect
{
described_class
.
new
.
perform
(
merge_request_diff
.
id
)
}
expect
{
described_class
.
new
.
perform
}
.
not_to
change
{
merge_request_diff
.
merge_request_diff_files
.
count
}
.
from
(
20
)
end
...
...
@@ -61,9 +61,26 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, schema: 20180
merge_request
=
create
(
:merge_request
,
:merged
)
merge_request_diff
=
merge_request
.
merge_request_diff
expect
{
described_class
.
new
.
perform
(
merge_request_diff
.
id
)
}
expect
{
described_class
.
new
.
perform
}
.
not_to
change
{
merge_request_diff
.
merge_request_diff_files
.
count
}
.
from
(
20
)
end
end
describe
'#wait_deadtuple_vacuum'
do
it
'sleeps process for VACUUM_WAIT_TIME when hitting DEAD_TUPLES_THRESHOLD'
,
:postgresql
do
worker
=
described_class
.
new
threshold_query_result
=
[{
"n_dead_tup"
=>
described_class
::
DEAD_TUPLES_THRESHOLD
.
to_s
}]
normal_query_result
=
[{
"n_dead_tup"
=>
'3'
}]
allow
(
worker
)
.
to
receive
(
:execute_statement
)
.
with
(
/SELECT n_dead_tup */
)
.
and_return
(
threshold_query_result
,
normal_query_result
)
expect
(
worker
).
to
receive
(
:sleep
).
with
(
described_class
::
VACUUM_WAIT_TIME
).
once
worker
.
wait_deadtuple_vacuum
(
1
)
end
end
end
spec/lib/gitlab/background_migration/schedule_diff_files_deletion_spec.rb
deleted
100644 → 0
View file @
4455904b
require
'spec_helper'
describe
Gitlab
::
BackgroundMigration
::
ScheduleDiffFilesDeletion
,
:migration
,
schema:
20180619121030
do
describe
'#perform'
do
let
(
:merge_request_diffs
)
{
table
(
:merge_request_diffs
)
}
let
(
:diff_files
)
{
table
(
:merge_request_diff_files
)
}
let
(
:merge_requests
)
{
table
(
:merge_requests
)
}
let
(
:namespaces
)
{
table
(
:namespaces
)
}
let
(
:projects
)
{
table
(
:projects
)
}
def
diff_file_params
(
extra_params
=
{})
extra_params
.
merge
(
new_file:
false
,
renamed_file:
false
,
too_large:
false
,
deleted_file:
false
,
a_mode:
'foo'
,
b_mode:
'bar'
,
new_path:
'xpto'
,
old_path:
'kux'
,
diff:
'content'
)
end
def
create_diffs
(
id
:,
files_number
:,
state:
'collected'
)
merge_request_diffs
.
create!
(
id:
id
,
merge_request_id:
1
,
state:
state
)
files_number
.
times
.
to_a
.
each
do
|
index
|
params
=
diff_file_params
(
merge_request_diff_id:
id
,
relative_order:
index
)
diff_files
.
create!
(
params
)
end
end
before
do
stub_const
(
"
#{
described_class
.
name
}
::DELETION_BATCH"
,
10
)
namespaces
.
create!
(
id:
1
,
name:
'gitlab'
,
path:
'gitlab'
)
projects
.
create!
(
id:
1
,
namespace_id:
1
,
name:
'gitlab'
,
path:
'gitlab'
)
merge_requests
.
create!
(
id:
1
,
target_project_id:
1
,
source_project_id:
1
,
target_branch:
'feature'
,
source_branch:
'master'
,
state:
'merged'
)
end
it
'correctly schedules diff file deletion workers'
do
Sidekiq
::
Testing
.
fake!
do
Timecop
.
freeze
do
create_diffs
(
id:
1
,
files_number:
25
)
create_diffs
(
id:
2
,
files_number:
11
)
create_diffs
(
id:
3
,
files_number:
4
,
state:
'without_files'
)
create_diffs
(
id:
4
,
files_number:
5
,
state:
'empty'
)
create_diffs
(
id:
5
,
files_number:
9
)
worker
=
described_class
.
new
expect
(
worker
).
to
receive
(
:log_days_to_process_all_jobs
).
with
(
1
.
second
+
2.5
.
minutes
+
1.1
.
minutes
+
0.9
.
minutes
)
worker
.
perform
expect
(
described_class
::
MIGRATION
).
to
be_scheduled_delayed_migration
(
1
.
second
,
1
)
expect
(
described_class
::
MIGRATION
).
to
be_scheduled_delayed_migration
(
1
.
second
+
2.5
.
minutes
,
2
)
expect
(
described_class
::
MIGRATION
).
to
be_scheduled_delayed_migration
(
1
.
second
+
2.5
.
minutes
+
1.1
.
minutes
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
3
)
end
end
end
end
describe
'#days_to_process_all_jobs'
do
it
'logs how many days it will take to run all jobs'
do
expect
(
Rails
).
to
receive_message_chain
(
:logger
,
:info
)
.
with
(
"Gitlab::BackgroundMigration::DeleteDiffFiles will take 3 days to be processed"
)
described_class
.
new
.
log_days_to_process_all_jobs
(
3
.
days
.
seconds
)
end
end
end
spec/migrations/enqueue_delete_diff_files_workers_spec.rb
View file @
19966e70
...
...
@@ -2,11 +2,11 @@ require 'spec_helper'
require
Rails
.
root
.
join
(
'db'
,
'post_migrate'
,
'20180619121030_enqueue_delete_diff_files_workers.rb'
)
describe
EnqueueDeleteDiffFilesWorkers
,
:migration
,
:sidekiq
do
it
'correctly schedules diff file
deletion scheduler
'
do
it
'correctly schedules diff file
s deletion
'
do
Sidekiq
::
Testing
.
fake!
do
expect
(
BackgroundMigrationWorker
)
.
to
receive
(
:perform_async
)
.
with
(
described_class
::
SCHEDULER
)
.
with
(
described_class
::
MIGRATION
)
.
and_call_original
migrate!
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment