Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
gitlab-ce
Commits
8c9e6920
Commit
8c9e6920
authored
Dec 06, 2018
by
Douglas Barbosa Alexandre
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fill project_repositories for hashed storage
parent
5237a55d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
91 additions
and
147 deletions
+91
-147
db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
...te/20181130102132_backfill_hashed_project_repositories.rb
+3
-4
lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
...kground_migration/backfill_hashed_project_repositories.rb
+51
-41
spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
...nd_migration/backfill_hashed_project_repositories_spec.rb
+37
-102
No files found.
db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb
View file @
8c9e6920
# frozen_string_literal: true
class
BackfillHashedProjectRepositories
<
ActiveRecord
::
Migration
[
5.0
]
class
BackfillHashedProjectRepositories
<
ActiveRecord
::
Migration
[
4.2
]
include
Gitlab
::
Database
::
MigrationHelpers
DOWNTIME
=
false
BATCH_SIZE
=
1_000
DELAY_INTERVAL
=
1
.
minutes
DELAY_INTERVAL
=
5
.
minutes
MIGRATION
=
'BackfillHashedProjectRepositories'
disable_ddl_transaction!
...
...
@@ -21,7 +21,6 @@ class BackfillHashedProjectRepositories < ActiveRecord::Migration[5.0]
end
def
down
# Since there could have been existing rows before the migration
# do not remove anything
# no-op: since there could have been existing rows before the migration do not remove anything
end
end
lib/gitlab/background_migration/backfill_hashed_project_repositories.rb
View file @
8c9e6920
...
...
@@ -2,58 +2,60 @@
module
Gitlab
module
BackgroundMigration
# Class the will create rows in project_repositories for all
# projects that are on hashed storage
# Class that will create fill the project_repositories table
# for all projects that are on hashed storage and an entry is
# is missing in this table.
class
BackfillHashedProjectRepositories
#
Model for a Shard
#
Shard model
class
Shard
<
ActiveRecord
::
Base
self
.
table_name
=
'shards'
def
self
.
by_name
(
name
)
to_a
.
detect
{
|
shard
|
shard
.
name
==
name
}
||
create_by
(
name:
name
)
rescue
ActiveRecord
::
RecordNotUnique
retry
end
end
# Class that will find or create the shard by name.
# There is only a small set of shards, which would not change quickly,
# so look them up from memory instead of hitting the DB each time.
# There is only a small set of shards, which would
# not change quickly, so look them up from memory
# instead of hitting the DB each time.
class
ShardFinder
def
find
(
name
)
shards
.
detect
{
|
shard
|
shard
.
name
==
name
}
||
create!
(
name
)
def
find_shard_id
(
name
)
shard_id
=
shards
.
fetch
(
name
,
nil
)
return
shard_id
if
shard_id
.
present?
Shard
.
transaction
(
requires_new:
true
)
do
create!
(
name
)
end
rescue
ActiveRecord
::
RecordNotUnique
load
!
re
load!
retry
end
private
def
create!
(
name
)
Shard
.
create!
(
name:
name
).
tap
{
|
shard
|
@shards
<<
shar
d
}
Shard
.
create!
(
name:
name
).
tap
{
|
shard
|
@shards
[
name
]
=
shard
.
i
d
}
end
def
shards
@shards
||
load
!
@shards
||
=
re
load!
end
def
load!
@shards
=
Shard
.
all
.
to_a
def
re
load!
@shards
=
Hash
[
*
Shard
.
all
.
map
{
|
shard
|
[
shard
.
name
,
shard
.
id
]
}.
flatten
]
end
end
#
Model for a ProjectRepository
#
ProjectRegistry model
class
ProjectRepository
<
ActiveRecord
::
Base
self
.
table_name
=
'project_repositories'
belongs_to
:project
,
inverse_of: :project_repository
end
#
Model for a Project
#
Project model
class
Project
<
ActiveRecord
::
Base
self
.
table_name
=
'projects'
HASHED_PATH_PREFIX
=
'@hashed'
HASHED_STORAGE_FEATURES
=
{
repository:
1
,
attachments:
2
...
...
@@ -63,33 +65,26 @@ module Gitlab
class
<<
self
def
on_hashed_storage
where
(
arel_table
[
:storage_version
].
gteq
(
HASHED_STORAGE_FEATURES
[
:repository
]))
where
(
Project
.
arel_table
[
:storage_version
]
.
gteq
(
HASHED_STORAGE_FEATURES
[
:repository
]))
end
def
without_project_repository
cond
=
ProjectRepository
.
arel_table
[
:project_id
].
eq
(
nil
)
left_outer_joins
(
:project_repository
).
where
(
cond
)
joins
(
left_outer_join_project_repository
)
.
where
(
ProjectRepository
.
arel_table
[
:project_id
].
eq
(
nil
)
)
end
def
left_outer_joins
(
relation
)
return
super
if
Gitlab
.
rails5?
def
left_outer_join_project_repository
projects_table
=
Project
.
arel_table
repository_table
=
ProjectRepository
.
arel_table
# TODO Rails 4?
projects_table
.
join
(
repository_table
,
Arel
::
Nodes
::
OuterJoin
)
.
on
(
projects_table
[
:id
].
eq
(
repository_table
[
:project_id
]))
.
join_sources
end
end
def
project_repository_attributes
(
shard_finder
)
return
unless
hashed_storage?
{
project_id:
id
,
shard_id:
shard_finder
.
find
(
repository_storage
).
id
,
disk_path:
hashed_disk_path
}
end
private
def
hashed_storage?
self
.
storage_version
&&
self
.
storage_version
>=
1
end
...
...
@@ -99,7 +94,7 @@ module Gitlab
end
def
disk_hash
@disk_hash
||=
Digest
::
SHA2
.
hexdigest
(
id
.
to_s
)
if
id
@disk_hash
||=
Digest
::
SHA2
.
hexdigest
(
id
.
to_s
)
end
end
...
...
@@ -110,12 +105,27 @@ module Gitlab
private
def
project_repositories
(
start_id
,
stop_id
)
Project
.
on_hashed_storage
.
without_project_repository
Project
.
on_hashed_storage
.
without_project_repository
.
where
(
id:
start_id
..
stop_id
)
.
map
{
|
project
|
project
.
project_repository_attributes
(
shard_finder
)
}
.
map
{
|
project
|
build_attributes_for_project
(
project
)
}
.
compact
end
def
build_attributes_for_project
(
project
)
return
unless
project
.
hashed_storage?
{
project_id:
project
.
id
,
shard_id:
find_shard_id
(
project
.
repository_storage
),
disk_path:
project
.
hashed_disk_path
}
end
def
find_shard_id
(
repository_storage
)
shard_finder
.
find_shard_id
(
repository_storage
)
end
def
shard_finder
@shard_finder
||=
ShardFinder
.
new
end
...
...
spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb
View file @
8c9e6920
...
...
@@ -3,59 +3,31 @@
require
'spec_helper'
describe
Gitlab
::
BackgroundMigration
::
BackfillHashedProjectRepositories
,
:migration
,
schema:
20181130102132
do
let
(
:shards
)
{
table
(
:shards
)
}
let
(
:namespaces
)
{
table
(
:namespaces
)
}
let
(
:projects
)
{
table
(
:projects
)
}
let
(
:project_repositories
)
{
table
(
:project_repositories
)
}
let
(
:projects
)
{
table
(
:projects
)
}
let
(
:shards
)
{
table
(
:shards
)
}
let
(
:group
)
{
namespaces
.
create!
(
name:
'foo'
,
path:
'foo'
)
}
let
(
:
default_
shard
)
{
shards
.
create!
(
name:
'default'
)
}
let
(
:shard
)
{
shards
.
create!
(
name:
'default'
)
}
describe
described_class
::
ShardFinder
do
describe
'#find'
do
subject
(
:finder
)
{
described_class
.
new
}
it
'creates the shard by name'
do
expect
(
finder
).
to
receive
(
:create!
).
and_call_original
expect
(
finder
.
find
(
'default'
)).
to
be_present
end
it
'does not try to create existing shards'
do
shards
.
create
(
name:
'default'
)
expect
(
finder
).
not_to
receive
(
:create!
)
finder
.
find
(
'default'
)
end
it
'only queries the database once for shards'
do
finder
.
find
(
'default'
)
expect
do
finder
.
find
(
'default'
)
end
.
not_to
exceed_query_limit
(
0
)
end
describe
'#find_shard_id'
do
it
'creates a new shard when it does not exist yet'
do
expect
do
finder
.
find
(
'other'
)
end
.
to
change
(
shards
,
:count
).
by
(
1
)
expect
{
subject
.
find_shard_id
(
'other'
)
}.
to
change
(
shards
,
:count
).
by
(
1
)
end
it
'
only creates a new shard once
'
do
finder
.
find
(
'other'
)
it
'
returns the shard when it exists
'
do
shards
.
create
(
id:
5
,
name:
'other'
)
expect
do
finder
.
find
(
'other'
)
end
.
not_to
change
(
shards
,
:count
)
end
shard_id
=
subject
.
find_shard_id
(
'other'
)
it
'is not vulnerable to race conditions'
do
finder
.
find
(
'default'
)
expect
(
shard_id
).
to
eq
(
5
)
end
other_shard
=
shards
.
create
(
name:
'other'
)
it
'only queries the database once to retrieve shards'
do
subject
.
find_shard_id
(
'default'
)
expect
(
finder
.
find
(
'other'
).
id
).
to
eq
(
other_shard
.
id
)
expect
{
subject
.
find_shard_id
(
'default'
)
}.
not_to
exceed_query_limit
(
0
)
end
end
end
...
...
@@ -63,93 +35,56 @@ describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migrat
describe
described_class
::
Project
do
describe
'.on_hashed_storage'
do
it
'finds projects with repository on hashed storage'
do
hashed_projects
=
[
projects
.
create!
(
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
1
),
projects
.
create!
(
name:
'bar'
,
path:
'bar'
,
namespace_id:
group
.
id
,
storage_version:
2
)
]
projects
.
create!
(
id:
1
,
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
1
)
projects
.
create!
(
id:
2
,
name:
'bar'
,
path:
'bar'
,
namespace_id:
group
.
id
,
storage_version:
2
)
projects
.
create!
(
id:
3
,
name:
'baz'
,
path:
'baz'
,
namespace_id:
group
.
id
,
storage_version:
0
)
projects
.
create!
(
id:
4
,
name:
'zoo'
,
path:
'zoo'
,
namespace_id:
group
.
id
,
storage_version:
nil
)
projects
.
create!
(
name:
'baz'
,
path:
'baz'
,
namespace_id:
group
.
id
,
storage_version:
0
)
projects
.
create!
(
name:
'quz'
,
path:
'quz'
,
namespace_id:
group
.
id
,
storage_version:
nil
)
expect
(
described_class
.
on_hashed_storage
.
pluck
(
:id
)).
to
match_array
(
hashed_projects
.
map
(
&
:id
))
expect
(
described_class
.
on_hashed_storage
.
pluck
(
:id
)).
to
match_array
([
1
,
2
])
end
end
describe
'.without_project_repository'
do
it
'finds projects which do not have a projects_repositories
row
'
do
without_project
=
projects
.
create!
(
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
)
with_project
=
projects
.
create!
(
name:
'bar'
,
path:
'bar'
,
namespace_id:
group
.
id
)
project_repositories
.
create!
(
project_id:
with_project
.
id
,
disk_path:
'@phony/foo/bar'
,
shard_id:
default_
shard
.
id
)
it
'finds projects which do not have a projects_repositories
entry
'
do
projects
.
create!
(
id:
1
,
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
)
projects
.
create!
(
id:
2
,
name:
'bar'
,
path:
'bar'
,
namespace_id:
group
.
id
)
project_repositories
.
create!
(
project_id:
2
,
disk_path:
'@phony/foo/bar'
,
shard_id:
shard
.
id
)
expect
(
described_class
.
without_project_repository
.
pluck
(
:id
)).
to
contain_exactly
(
without_project
.
id
)
end
end
describe
'#project_repository_attributes'
do
let
(
:shard_finder
)
{
Gitlab
::
BackgroundMigration
::
BackfillHashedProjectRepositories
::
ShardFinder
.
new
}
it
'composes the correct attributes for project_repository'
do
shiny_shard
=
shards
.
create!
(
name:
'shiny'
)
project
=
projects
.
create!
(
id:
5
,
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
repository_storage:
shiny_shard
.
name
,
storage_version:
1
)
expected_attributes
=
{
project_id:
project
.
id
,
shard_id:
shiny_shard
.
id
,
disk_path:
'@hashed/ef/2d/ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d'
}
expect
(
described_class
.
find
(
project
.
id
).
project_repository_attributes
(
shard_finder
)).
to
eq
(
expected_attributes
)
end
it
'returns nil for a project not on hashed storage'
do
project
=
projects
.
create!
(
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
0
)
expect
(
described_class
.
find
(
project
.
id
).
project_repository_attributes
(
shard_finder
)).
to
be_nil
expect
(
described_class
.
without_project_repository
.
pluck
(
:id
)).
to
contain_exactly
(
1
)
end
end
end
describe
'#perform'
do
def
perform!
described_class
.
new
.
perform
(
1
,
projects
.
last
.
id
)
end
it
'create project_repository row for hashed storage project'
do
projects
.
create!
(
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
1
)
it
'creates a project_repository row for projects on hashed storage that need one'
do
projects
.
create!
(
id:
1
,
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
1
)
projects
.
create!
(
id:
2
,
name:
'bar'
,
path:
'bar'
,
namespace_id:
group
.
id
,
storage_version:
2
)
expect
do
perform!
end
.
to
change
(
project_repositories
,
:count
).
by
(
1
)
expect
{
described_class
.
new
.
perform
(
1
,
projects
.
last
.
id
)
}.
to
change
(
project_repositories
,
:count
).
by
(
2
)
end
it
'does nothing for projects
that have already a project_repository
'
do
project
=
projects
.
create!
(
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
1
)
project_repositories
.
create!
(
project_id:
project
.
id
,
disk_path:
'@phony/foo/bar'
,
shard_id:
default_
shard
.
id
)
it
'does nothing for projects
on hashed storage that have already a project_repository row
'
do
project
s
.
create!
(
id:
1
,
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
1
)
project_repositories
.
create!
(
project_id:
1
,
disk_path:
'@phony/foo/bar'
,
shard_id:
shard
.
id
)
expect
do
perform!
end
.
not_to
change
(
project_repositories
,
:count
)
expect
{
described_class
.
new
.
perform
(
1
,
projects
.
last
.
id
)
}.
not_to
change
(
project_repositories
,
:count
)
end
it
'does nothing for projects on legacy storage'
do
projects
.
create!
(
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
0
)
expect
do
perform!
end
.
not_to
change
(
project_repositories
,
:count
)
expect
{
described_class
.
new
.
perform
(
1
,
projects
.
last
.
id
)
}.
not_to
change
(
project_repositories
,
:count
)
end
it
'inserts rows in a single query'
do
projects
.
create!
(
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
1
,
repository_storage:
default_
shard
.
name
)
projects
.
create!
(
name:
'foo'
,
path:
'foo'
,
namespace_id:
group
.
id
,
storage_version:
1
,
repository_storage:
shard
.
name
)
control_count
=
ActiveRecord
::
QueryRecorder
.
new
do
perform!
end
control_count
=
ActiveRecord
::
QueryRecorder
.
new
{
described_class
.
new
.
perform
(
1
,
projects
.
last
.
id
)
}
projects
.
create!
(
name:
'bar'
,
path:
'bar'
,
namespace_id:
group
.
id
,
storage_version:
1
,
repository_storage:
default_
shard
.
name
)
projects
.
create!
(
name:
'
quz'
,
path:
'quz'
,
namespace_id:
group
.
id
,
storage_version:
1
,
repository_storage:
default_
shard
.
name
)
projects
.
create!
(
name:
'bar'
,
path:
'bar'
,
namespace_id:
group
.
id
,
storage_version:
1
,
repository_storage:
shard
.
name
)
projects
.
create!
(
name:
'
zoo'
,
path:
'zoo'
,
namespace_id:
group
.
id
,
storage_version:
1
,
repository_storage:
shard
.
name
)
expect
{
perform!
}.
not_to
exceed_query_limit
(
control_count
)
expect
{
described_class
.
new
.
perform
(
1
,
projects
.
last
.
id
)
}.
not_to
exceed_query_limit
(
control_count
)
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment