Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
474fd913
Commit
474fd913
authored
Nov 23, 2018
by
Andreas Brandl
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move strategies in their own files
This improves readability quite a bit.
parent
ff35cb45
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
323 additions
and
279 deletions
+323
-279
lib/gitlab/database/count.rb
lib/gitlab/database/count.rb
+0
-155
lib/gitlab/database/count/exact_count_strategy.rb
lib/gitlab/database/count/exact_count_strategy.rb
+31
-0
lib/gitlab/database/count/reltuples_count_strategy.rb
lib/gitlab/database/count/reltuples_count_strategy.rb
+79
-0
lib/gitlab/database/count/tablesample_count_strategy.rb
lib/gitlab/database/count/tablesample_count_strategy.rb
+66
-0
spec/lib/gitlab/database/count/exact_count_strategy_spec.rb
spec/lib/gitlab/database/count/exact_count_strategy_spec.rb
+34
-0
spec/lib/gitlab/database/count/reltuples_count_strategy_spec.rb
...ib/gitlab/database/count/reltuples_count_strategy_spec.rb
+48
-0
spec/lib/gitlab/database/count/tablesample_count_strategy_spec.rb
.../gitlab/database/count/tablesample_count_strategy_spec.rb
+65
-0
spec/lib/gitlab/database/count_spec.rb
spec/lib/gitlab/database/count_spec.rb
+0
-124
No files found.
lib/gitlab/database/count.rb
View file @
474fd913
...
@@ -50,161 +50,6 @@ module Gitlab
...
@@ -50,161 +50,6 @@ module Gitlab
end
end
end
end
end
end
# This strategy performs an exact count on the model.
#
# This is guaranteed to be accurate, however it also scans the
# whole table. Hence, there are no guarantees with respect
# to runtime.
#
# Note that for very large tables, this may even timeout.
class
ExactCountStrategy
attr_reader
:models
def
initialize
(
models
)
@models
=
models
end
def
count
models
.
each_with_object
({})
do
|
model
,
data
|
data
[
model
]
=
model
.
count
end
end
def
self
.
enabled?
true
end
end
class
PgClass
<
ActiveRecord
::
Base
self
.
table_name
=
'pg_class'
end
# This strategy counts based on PostgreSQL's statistics in pg_stat_user_tables.
#
# Specifically, it relies on the column reltuples in said table. An additional
# check is performed to make sure statistics were updated within the last hour.
#
# Otherwise, this strategy skips tables with outdated statistics.
#
# There are no guarantees with respect to the accuracy of this strategy. Runtime
# however is guaranteed to be "fast", because it only looks up statistics.
class
ReltuplesCountStrategy
attr_reader
:models
def
initialize
(
models
)
@models
=
models
end
# Returns a hash of the table names that have recently updated tuples.
#
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def
count
size_estimates
rescue
*
CONNECTION_ERRORS
{}
end
def
self
.
enabled?
Gitlab
::
Database
.
postgresql?
end
private
def
table_names
models
.
map
(
&
:table_name
)
end
def
size_estimates
(
check_statistics:
true
)
table_to_model
=
models
.
each_with_object
({})
{
|
model
,
h
|
h
[
model
.
table_name
]
=
model
}
# Querying tuple stats only works on the primary. Due to load balancing, the
# easiest way to do this is to start a transaction.
ActiveRecord
::
Base
.
transaction
do
get_statistics
(
table_names
,
check_statistics:
check_statistics
).
each_with_object
({})
do
|
row
,
data
|
model
=
table_to_model
[
row
.
table_name
]
data
[
model
]
=
row
.
estimate
end
end
end
# Generates the PostgreSQL query to return the tuples for tables
# that have been vacuumed or analyzed in the last hour.
#
# @param [Array] table names
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def
get_statistics
(
table_names
,
check_statistics:
true
)
time
=
"to_timestamp(
#{
1
.
hour
.
ago
.
to_i
}
)"
query
=
PgClass
.
joins
(
"LEFT JOIN pg_stat_user_tables USING (relname)"
)
.
where
(
relname:
table_names
)
.
select
(
'pg_class.relname AS table_name, reltuples::bigint AS estimate'
)
if
check_statistics
query
=
query
.
where
(
'last_vacuum > ? OR last_autovacuum > ? OR last_analyze > ? OR last_autoanalyze > ?'
,
time
,
time
,
time
,
time
)
end
query
end
end
# A tablesample count executes in two phases:
# * Estimate table sizes based on reltuples.
# * Based on the estimate:
# * If the table is considered 'small', execute an exact relation count.
# * Otherwise, count on a sample of the table using TABLESAMPLE.
#
# The size of the sample is chosen in a way that we always roughly scan
# the same amount of rows (see TABLESAMPLE_ROW_TARGET).
#
# There are no guarantees with respect to the accuracy of the result or runtime.
class
TablesampleCountStrategy
<
ReltuplesCountStrategy
EXACT_COUNT_THRESHOLD
=
100_000
TABLESAMPLE_ROW_TARGET
=
100_000
def
count
estimates
=
size_estimates
(
check_statistics:
false
)
models
.
each_with_object
({})
do
|
model
,
count_by_model
|
count
=
perform_count
(
model
,
estimates
[
model
])
count_by_model
[
model
]
=
count
if
count
end
rescue
*
CONNECTION_ERRORS
{}
end
def
self
.
enabled?
Gitlab
::
Database
.
postgresql?
&&
Feature
.
enabled?
(
:tablesample_counts
)
end
private
def
perform_count
(
model
,
estimate
)
# If we estimate 0, we may not have statistics at all. Don't use them.
return
nil
unless
estimate
&&
estimate
>
0
if
estimate
<
EXACT_COUNT_THRESHOLD
# The table is considered small, the assumption here is that
# the exact count will be fast anyways.
model
.
count
else
# The table is considered large, let's only count on a sample.
tablesample_count
(
model
,
estimate
)
end
end
def
tablesample_count
(
model
,
estimate
)
portion
=
(
TABLESAMPLE_ROW_TARGET
.
to_f
/
estimate
).
round
(
4
)
inverse
=
1
/
portion
query
=
<<~
SQL
SELECT (COUNT(*)*
#{
inverse
}
)::integer AS count
FROM
#{
model
.
table_name
}
TABLESAMPLE SYSTEM (
#{
portion
*
100
}
)
SQL
rows
=
ActiveRecord
::
Base
.
connection
.
select_all
(
query
)
Integer
(
rows
.
first
[
'count'
])
end
end
end
end
end
end
end
end
lib/gitlab/database/count/exact_count_strategy.rb
0 → 100644
View file @
474fd913
# frozen_string_literal: true
module
Gitlab
module
Database
module
Count
# This strategy performs an exact count on the model.
#
# This is guaranteed to be accurate, however it also scans the
# whole table. Hence, there are no guarantees with respect
# to runtime.
#
# Note that for very large tables, this may even timeout.
class
ExactCountStrategy
attr_reader
:models
def
initialize
(
models
)
@models
=
models
end
def
count
models
.
each_with_object
({})
do
|
model
,
data
|
data
[
model
]
=
model
.
count
end
end
def
self
.
enabled?
true
end
end
end
end
end
lib/gitlab/database/count/reltuples_count_strategy.rb
0 → 100644
View file @
474fd913
# frozen_string_literal: true
module
Gitlab
module
Database
module
Count
class
PgClass
<
ActiveRecord
::
Base
self
.
table_name
=
'pg_class'
end
# This strategy counts based on PostgreSQL's statistics in pg_stat_user_tables.
#
# Specifically, it relies on the column reltuples in said table. An additional
# check is performed to make sure statistics were updated within the last hour.
#
# Otherwise, this strategy skips tables with outdated statistics.
#
# There are no guarantees with respect to the accuracy of this strategy. Runtime
# however is guaranteed to be "fast", because it only looks up statistics.
class
ReltuplesCountStrategy
attr_reader
:models
def
initialize
(
models
)
@models
=
models
end
# Returns a hash of the table names that have recently updated tuples.
#
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def
count
size_estimates
rescue
*
CONNECTION_ERRORS
{}
end
def
self
.
enabled?
Gitlab
::
Database
.
postgresql?
end
private
def
table_names
models
.
map
(
&
:table_name
)
end
def
size_estimates
(
check_statistics:
true
)
table_to_model
=
models
.
each_with_object
({})
{
|
model
,
h
|
h
[
model
.
table_name
]
=
model
}
# Querying tuple stats only works on the primary. Due to load balancing, the
# easiest way to do this is to start a transaction.
ActiveRecord
::
Base
.
transaction
do
get_statistics
(
table_names
,
check_statistics:
check_statistics
).
each_with_object
({})
do
|
row
,
data
|
model
=
table_to_model
[
row
.
table_name
]
data
[
model
]
=
row
.
estimate
end
end
end
# Generates the PostgreSQL query to return the tuples for tables
# that have been vacuumed or analyzed in the last hour.
#
# @param [Array] table names
# @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
def
get_statistics
(
table_names
,
check_statistics:
true
)
time
=
1
.
hour
.
ago
query
=
PgClass
.
joins
(
"LEFT JOIN pg_stat_user_tables USING (relname)"
)
.
where
(
relname:
table_names
)
.
select
(
'pg_class.relname AS table_name, reltuples::bigint AS estimate'
)
if
check_statistics
query
=
query
.
where
(
'last_vacuum > ? OR last_autovacuum > ? OR last_analyze > ? OR last_autoanalyze > ?'
,
time
,
time
,
time
,
time
)
end
query
end
end
end
end
end
lib/gitlab/database/count/tablesample_count_strategy.rb
0 → 100644
View file @
474fd913
# frozen_string_literal: true
module
Gitlab
module
Database
module
Count
# A tablesample count executes in two phases:
# * Estimate table sizes based on reltuples.
# * Based on the estimate:
# * If the table is considered 'small', execute an exact relation count.
# * Otherwise, count on a sample of the table using TABLESAMPLE.
#
# The size of the sample is chosen in a way that we always roughly scan
# the same amount of rows (see TABLESAMPLE_ROW_TARGET).
#
# There are no guarantees with respect to the accuracy of the result or runtime.
class
TablesampleCountStrategy
<
ReltuplesCountStrategy
EXACT_COUNT_THRESHOLD
=
100_000
TABLESAMPLE_ROW_TARGET
=
100_000
def
count
estimates
=
size_estimates
(
check_statistics:
false
)
models
.
each_with_object
({})
do
|
model
,
count_by_model
|
count
=
perform_count
(
model
,
estimates
[
model
])
count_by_model
[
model
]
=
count
if
count
end
rescue
*
CONNECTION_ERRORS
{}
end
def
self
.
enabled?
Gitlab
::
Database
.
postgresql?
&&
Feature
.
enabled?
(
:tablesample_counts
)
end
private
def
perform_count
(
model
,
estimate
)
# If we estimate 0, we may not have statistics at all. Don't use them.
return
nil
unless
estimate
&&
estimate
>
0
if
estimate
<
EXACT_COUNT_THRESHOLD
# The table is considered small, the assumption here is that
# the exact count will be fast anyways.
model
.
count
else
# The table is considered large, let's only count on a sample.
tablesample_count
(
model
,
estimate
)
end
end
def
tablesample_count
(
model
,
estimate
)
portion
=
(
TABLESAMPLE_ROW_TARGET
.
to_f
/
estimate
).
round
(
4
)
inverse
=
1
/
portion
query
=
<<~
SQL
SELECT (COUNT(*)*
#{
inverse
}
)::integer AS count
FROM
#{
model
.
table_name
}
TABLESAMPLE SYSTEM (
#{
portion
*
100
}
)
SQL
rows
=
ActiveRecord
::
Base
.
connection
.
select_all
(
query
)
Integer
(
rows
.
first
[
'count'
])
end
end
end
end
end
spec/lib/gitlab/database/count/exact_count_strategy_spec.rb
0 → 100644
View file @
474fd913
require
'spec_helper'
describe
Gitlab
::
Database
::
Count
::
ExactCountStrategy
do
before
do
create_list
(
:project
,
3
)
create
(
:identity
)
end
let
(
:models
)
{
[
Project
,
Identity
]
}
subject
{
described_class
.
new
(
models
).
count
}
describe
'#count'
do
it
'counts all models'
do
expect
(
models
).
to
all
(
receive
(
:count
).
and_call_original
)
expect
(
subject
).
to
eq
({
Project
=>
3
,
Identity
=>
1
})
end
end
describe
'.enabled?'
do
it
'is enabled for PostgreSQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
true
)
expect
(
described_class
.
enabled?
).
to
be_truthy
end
it
'is enabled for MySQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
false
)
expect
(
described_class
.
enabled?
).
to
be_truthy
end
end
end
spec/lib/gitlab/database/count/reltuples_count_strategy_spec.rb
0 → 100644
View file @
474fd913
require
'spec_helper'
describe
Gitlab
::
Database
::
Count
::
ReltuplesCountStrategy
do
before
do
create_list
(
:project
,
3
)
create
(
:identity
)
end
let
(
:models
)
{
[
Project
,
Identity
]
}
subject
{
described_class
.
new
(
models
).
count
}
describe
'#count'
,
:postgresql
do
context
'when reltuples is up to date'
do
before
do
ActiveRecord
::
Base
.
connection
.
execute
(
'ANALYZE projects'
)
ActiveRecord
::
Base
.
connection
.
execute
(
'ANALYZE identities'
)
end
it
'uses statistics to do the count'
do
models
.
each
{
|
model
|
expect
(
model
).
not_to
receive
(
:count
)
}
expect
(
subject
).
to
eq
({
Project
=>
3
,
Identity
=>
1
})
end
end
context
'insufficient permissions'
do
it
'returns an empty hash'
do
allow
(
ActiveRecord
::
Base
).
to
receive
(
:transaction
).
and_raise
(
PG
::
InsufficientPrivilege
)
expect
(
subject
).
to
eq
({})
end
end
end
describe
'.enabled?'
do
it
'is enabled for PostgreSQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
true
)
expect
(
described_class
.
enabled?
).
to
be_truthy
end
it
'is disabled for MySQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
false
)
expect
(
described_class
.
enabled?
).
to
be_falsey
end
end
end
spec/lib/gitlab/database/count/tablesample_count_strategy_spec.rb
0 → 100644
View file @
474fd913
require
'spec_helper'
describe
Gitlab
::
Database
::
Count
::
TablesampleCountStrategy
do
before
do
create_list
(
:project
,
3
)
create
(
:identity
)
end
let
(
:models
)
{
[
Project
,
Identity
]
}
let
(
:strategy
)
{
described_class
.
new
(
models
)
}
subject
{
strategy
.
count
}
describe
'#count'
,
:postgresql
do
let
(
:estimates
)
{
{
Project
=>
threshold
+
1
,
Identity
=>
threshold
-
1
}
}
let
(
:threshold
)
{
Gitlab
::
Database
::
Count
::
TablesampleCountStrategy
::
EXACT_COUNT_THRESHOLD
}
before
do
allow
(
strategy
).
to
receive
(
:size_estimates
).
with
(
check_statistics:
false
).
and_return
(
estimates
)
end
context
'for tables with an estimated small size'
do
it
'performs an exact count'
do
expect
(
Identity
).
to
receive
(
:count
).
and_call_original
expect
(
subject
).
to
include
({
Identity
=>
1
})
end
end
context
'for tables with an estimated large size'
do
it
'performs a tablesample count'
do
expect
(
Project
).
not_to
receive
(
:count
)
result
=
subject
expect
(
result
[
Project
]).
to
eq
(
3
)
end
end
context
'insufficient permissions'
do
it
'returns an empty hash'
do
allow
(
strategy
).
to
receive
(
:size_estimates
).
and_raise
(
PG
::
InsufficientPrivilege
)
expect
(
subject
).
to
eq
({})
end
end
end
describe
'.enabled?'
do
before
do
stub_feature_flags
(
tablesample_counts:
true
)
end
it
'is enabled for PostgreSQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
true
)
expect
(
described_class
.
enabled?
).
to
be_truthy
end
it
'is disabled for MySQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
false
)
expect
(
described_class
.
enabled?
).
to
be_falsey
end
end
end
spec/lib/gitlab/database/count_spec.rb
View file @
474fd913
...
@@ -56,128 +56,4 @@ describe Gitlab::Database::Count do
...
@@ -56,128 +56,4 @@ describe Gitlab::Database::Count do
end
end
end
end
end
end
describe
Gitlab
::
Database
::
Count
::
ExactCountStrategy
do
subject
{
described_class
.
new
(
models
).
count
}
describe
'#count'
do
it
'counts all models'
do
expect
(
models
).
to
all
(
receive
(
:count
).
and_call_original
)
expect
(
subject
).
to
eq
({
Project
=>
3
,
Identity
=>
1
})
end
end
describe
'.enabled?'
do
it
'is enabled for PostgreSQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
true
)
expect
(
described_class
.
enabled?
).
to
be_truthy
end
it
'is enabled for MySQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
false
)
expect
(
described_class
.
enabled?
).
to
be_truthy
end
end
end
describe
Gitlab
::
Database
::
Count
::
ReltuplesCountStrategy
do
subject
{
described_class
.
new
(
models
).
count
}
describe
'#count'
,
:postgresql
do
context
'when reltuples is up to date'
do
before
do
ActiveRecord
::
Base
.
connection
.
execute
(
'ANALYZE projects'
)
ActiveRecord
::
Base
.
connection
.
execute
(
'ANALYZE identities'
)
end
it
'uses statistics to do the count'
do
models
.
each
{
|
model
|
expect
(
model
).
not_to
receive
(
:count
)
}
expect
(
subject
).
to
eq
({
Project
=>
3
,
Identity
=>
1
})
end
end
context
'insufficient permissions'
do
it
'returns an empty hash'
do
allow
(
ActiveRecord
::
Base
).
to
receive
(
:transaction
).
and_raise
(
PG
::
InsufficientPrivilege
)
expect
(
subject
).
to
eq
({})
end
end
end
describe
'.enabled?'
do
it
'is enabled for PostgreSQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
true
)
expect
(
described_class
.
enabled?
).
to
be_truthy
end
it
'is disabled for MySQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
false
)
expect
(
described_class
.
enabled?
).
to
be_falsey
end
end
end
describe
Gitlab
::
Database
::
Count
::
TablesampleCountStrategy
do
subject
{
strategy
.
count
}
let
(
:strategy
)
{
described_class
.
new
(
models
)
}
describe
'#count'
,
:postgresql
do
let
(
:estimates
)
{
{
Project
=>
threshold
+
1
,
Identity
=>
threshold
-
1
}
}
let
(
:threshold
)
{
Gitlab
::
Database
::
Count
::
TablesampleCountStrategy
::
EXACT_COUNT_THRESHOLD
}
before
do
allow
(
strategy
).
to
receive
(
:size_estimates
).
with
(
check_statistics:
false
).
and_return
(
estimates
)
end
context
'for tables with an estimated small size'
do
it
'performs an exact count'
do
expect
(
Identity
).
to
receive
(
:count
).
and_call_original
expect
(
subject
).
to
include
({
Identity
=>
1
})
end
end
context
'for tables with an estimated large size'
do
it
'performs a tablesample count'
do
expect
(
Project
).
not_to
receive
(
:count
)
result
=
subject
expect
(
result
[
Project
]).
to
eq
(
3
)
end
end
context
'insufficient permissions'
do
it
'returns an empty hash'
do
allow
(
strategy
).
to
receive
(
:size_estimates
).
and_raise
(
PG
::
InsufficientPrivilege
)
expect
(
subject
).
to
eq
({})
end
end
end
describe
'.enabled?'
do
before
do
stub_feature_flags
(
tablesample_counts:
true
)
end
it
'is enabled for PostgreSQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
true
)
expect
(
described_class
.
enabled?
).
to
be_truthy
end
it
'is disabled for MySQL'
do
allow
(
Gitlab
::
Database
).
to
receive
(
:postgresql?
).
and_return
(
false
)
expect
(
described_class
.
enabled?
).
to
be_falsey
end
end
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment