Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
bb5b266b
Commit
bb5b266b
authored
Sep 02, 2019
by
Douglas Barbosa Alexandre
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Extract Geo::Secondary::BackfillWorker concern
parent
018a2d2c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
134 additions
and
89 deletions
+134
-89
ee/app/workers/concerns/geo/secondary/backfill_worker.rb
ee/app/workers/concerns/geo/secondary/backfill_worker.rb
+89
-0
ee/app/workers/geo/secondary/repository_backfill_worker.rb
ee/app/workers/geo/secondary/repository_backfill_worker.rb
+32
-86
ee/spec/workers/geo/secondary/repository_backfill_worker_spec.rb
.../workers/geo/secondary/repository_backfill_worker_spec.rb
+13
-3
No files found.
ee/app/workers/concerns/geo/secondary/backfill_worker.rb
0 → 100644
View file @
bb5b266b
# frozen_string_literal: true
module
Geo
module
Secondary
module
BackfillWorker
extend
ActiveSupport
::
Concern
LEASE_TIMEOUT
=
60
.
minutes
RUN_TIME
=
60
.
minutes
.
to_i
included
do
prepend
ShadowMethods
include
ApplicationWorker
include
ExclusiveLeaseGuard
include
GeoQueue
include
::
Gitlab
::
Geo
::
LogHelpers
include
::
Gitlab
::
Utils
::
StrongMemoize
attr_reader
:shard_name
,
:start_time
,
:loops
end
module
ShadowMethods
def
lease_key
@lease_key
||=
"
#{
self
.
class
.
name
.
underscore
}
:shard:
#{
shard_name
}
"
end
def
lease_timeout
LEASE_TIMEOUT
end
end
private
def
base_log_data
(
message
)
super
(
message
).
merge
(
worker_metadata
)
end
def
healthy_node?
unless
Gitlab
::
Geo
.
geo_database_configured?
log_info
(
'Geo database not configured'
)
return
false
end
unless
Gitlab
::
Geo
.
secondary?
log_info
(
'Current node not a secondary'
)
return
false
end
unless
Gitlab
::
ShardHealthCache
.
healthy_shard?
(
shard_name
)
log_info
(
"Shard (
#{
shard_name
}
) is not healthy"
)
return
false
end
true
end
# rubocop:disable Gitlab/ModuleWithInstanceVariables
def
node_enabled?
# Only check every minute to avoid polling the DB excessively
unless
@last_enabled_check
.
present?
&&
@last_enabled_check
>
1
.
minute
.
ago
@last_enabled_check
=
Time
.
now
.
utc
clear_memoization
(
:current_node_enabled
)
end
strong_memoize
(
:current_node_enabled
)
do
Gitlab
::
Geo
.
current_node_enabled?
end
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
def
run_time
RUN_TIME
end
def
over_capacity?
false
end
def
over_time?
(
Time
.
now
.
utc
-
start_time
)
>=
run_time
end
def
worker_metadata
{
shard:
shard_name
}
end
end
end
end
ee/app/workers/geo/secondary/repository_backfill_worker.rb
View file @
bb5b266b
...
...
@@ -3,14 +3,7 @@
module
Geo
module
Secondary
class
RepositoryBackfillWorker
include
ApplicationWorker
include
ExclusiveLeaseGuard
include
GeoQueue
include
::
Gitlab
::
Geo
::
LogHelpers
include
::
Gitlab
::
Utils
::
StrongMemoize
LEASE_TIMEOUT
=
60
.
minutes
RUN_TIME
=
60
.
minutes
.
to_i
include
Geo
::
Secondary
::
BackfillWorker
def
initialize
@scheduled_jobs
=
[]
...
...
@@ -21,20 +14,7 @@ module Geo
@start_time
=
Time
.
now
.
utc
@loops
=
0
unless
Gitlab
::
Geo
.
geo_database_configured?
log_info
(
'Geo database not configured'
)
return
end
unless
Gitlab
::
Geo
.
secondary?
log_info
(
'Current node not a secondary'
)
return
end
unless
Gitlab
::
ShardHealthCache
.
healthy_shard?
(
shard_name
)
log_info
(
"Shard (
#{
shard_name
}
) is not healthy"
)
return
end
return
unless
healthy_node?
try_obtain_lease
do
log_info
(
'Repository backfilling started'
)
...
...
@@ -53,15 +33,15 @@ module Geo
# This will stream the results one by one
# until there are no more results to fetch.
result
=
connection
.
get_result
or
break
result
=
connection
.
get_result
break
:complete
if
result
.
nil?
result
.
check
result
.
each
do
|
row
|
schedule_job
(
row
[
'id'
])
end
if
over_capacity?
sleep
(
1
)
end
sleep
(
1
)
if
over_capacity?
end
rescue
=>
error
reason
=
:error
...
...
@@ -75,31 +55,32 @@ module Geo
private
attr_reader
:s
hard_name
,
:start_time
,
:loops
,
:s
cheduled_jobs
attr_reader
:scheduled_jobs
def
base_log_data
(
message
)
s
uper
(
message
).
merge
(
worker_metadata
)
def
connection
s
trong_memoize
(
:connection
)
{
Geo
::
TrackingBase
.
connection
.
raw_connection
}
end
def
lease_key
@lease_key
||=
"
#{
self
.
class
.
name
.
underscore
}
:shard:
#{
shard_name
}
"
end
def
max_capacity
# If we don't have a count, that means that for some reason
# RepositorySyncWorker stopped running/updating the cache. We might
# be trying to shut down Geo while this job may still be running.
healthy_count
=
healthy_shard_count
return
0
unless
healthy_count
>
0
capacity_per_shard
=
Gitlab
::
Geo
.
current_node
.
repos_max_capacity
/
healthy_count
def
lease_timeout
LEASE_TIMEOUT
[
1
,
capacity_per_shard
.
to_i
].
max
end
def
worker_metadata
{
shard:
shard_name
}
def
healthy_shard_count
Gitlab
::
ShardHealthCache
.
healthy_shard_count
.
to_i
end
def
connection
strong_memoize
(
:connection
)
do
Geo
::
TrackingBase
.
connection
.
raw_connection
end
def
over_capacity?
scheduled_jobs
.
size
>=
max_capacity
end
# rubocop: disable CodeReuse/ActiveRecord
def
schedule_job
(
project_id
)
job_id
=
Geo
::
ProjectSyncWorker
.
perform_async
(
project_id
,
sync_repository:
true
,
sync_wiki:
true
)
...
...
@@ -110,20 +91,8 @@ module Geo
log_info
(
"Repository could not be scheduled for backfilling"
,
project_id:
project_id
)
end
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def
unsynced_projects_ids
Geo
::
ProjectUnsyncedFinder
.
new
(
current_node:
Gitlab
::
Geo
.
current_node
,
shard_name:
shard_name
)
.
execute
.
reorder
(
last_repository_updated_at: :desc
)
.
select
(
:id
)
end
# rubocop: enable CodeReuse/ActiveRecord
def
update_jobs_in_progress
scheduled_job_ids
=
scheduled_jobs
.
map
{
|
data
|
data
[
:job_id
]
}
status
=
Gitlab
::
SidekiqStatus
.
job_status
(
scheduled_job_ids
)
# SidekiqStatus returns an array of booleans: true if the job is still running, false otherwise.
...
...
@@ -132,42 +101,19 @@ module Geo
@scheduled_jobs
=
@scheduled_jobs
.
zip
(
status
).
map
{
|
(
job
,
running
)
|
job
if
running
}.
compact
end
def
max_capacity
healthy_count
=
Gitlab
::
ShardHealthCache
.
healthy_shard_count
# If we don't have a count, that means that for some reason
# RepositorySyncWorker stopped running/updating the cache. We might
# be trying to shut down Geo while this job may still be running.
return
0
unless
healthy_count
.
to_i
>
0
capacity_per_shard
=
Gitlab
::
Geo
.
current_node
.
repos_max_capacity
/
healthy_count
[
1
,
capacity_per_shard
.
to_i
].
max
end
def
node_enabled?
# Only check every minute to avoid polling the DB excessively
unless
@last_enabled_check
.
present?
&&
@last_enabled_check
>
1
.
minute
.
ago
@last_enabled_check
=
Time
.
now
clear_memoization
(
:current_node_enabled
)
end
strong_memoize
(
:current_node_enabled
)
do
Gitlab
::
Geo
.
current_node_enabled?
end
end
def
run_time
RUN_TIME
end
def
over_capacity?
scheduled_jobs
.
size
>=
max_capacity
def
scheduled_job_ids
scheduled_jobs
.
map
{
|
data
|
data
[
:job_id
]
}
end
def
over_time?
(
Time
.
now
.
utc
-
start_time
)
>=
run_time
# rubocop: disable CodeReuse/ActiveRecord
def
unsynced_projects_ids
Geo
::
ProjectUnsyncedFinder
.
new
(
current_node:
Gitlab
::
Geo
.
current_node
,
shard_name:
shard_name
)
.
execute
.
reorder
(
last_repository_updated_at: :desc
)
.
select
(
:id
)
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
ee/spec/workers/geo/secondary/repository_backfill_worker_spec.rb
View file @
bb5b266b
...
...
@@ -6,12 +6,12 @@ describe Geo::Secondary::RepositoryBackfillWorker, :geo, :geo_fdw, :clean_gitlab
include
EE
::
GeoHelpers
let
(
:primary
)
{
create
(
:geo_node
,
:primary
)
}
let
(
:secondary
)
{
create
(
:geo_node
)
}
let
(
:secondary
)
{
create
(
:geo_node
,
repos_max_capacity:
5
)
}
let
(
:shard_name
)
{
Gitlab
.
config
.
repositories
.
storages
.
keys
.
first
}
before
do
stub_current_geo_node
(
secondary
)
stub_
shard_healthy
(
shard_name
,
tru
e
)
stub_
healthy_shards
(
shard_nam
e
)
end
describe
'#perform'
do
...
...
@@ -35,7 +35,7 @@ describe Geo::Secondary::RepositoryBackfillWorker, :geo, :geo_fdw, :clean_gitlab
end
it
'does not schedule jobs when shard is not healthy'
do
stub_
shard_healthy
(
shard_name
,
false
)
stub_
healthy_shards
([]
)
create
(
:project
)
expect
(
Geo
::
ProjectSyncWorker
).
not_to
receive
(
:perform_async
)
...
...
@@ -68,5 +68,15 @@ describe Geo::Secondary::RepositoryBackfillWorker, :geo, :geo_fdw, :clean_gitlab
subject
.
perform
(
shard_name
)
end
it
'respects Geo secondary node max capacity per shard'
do
stub_healthy_shards
([
shard_name
,
'shard2'
,
'shard3'
,
'shard4'
,
'shard5'
])
allow
(
Geo
::
ProjectSyncWorker
).
to
receive
(
:perform_async
).
twice
.
and_return
(
'jid-123'
)
create_list
(
:project
,
2
)
expect
(
subject
).
to
receive
(
:sleep
).
twice
.
and_call_original
subject
.
perform
(
shard_name
)
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment