Commit 63620ec7 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch 'alerts-for-built-in-metrics' into 'master'

Import common metrics into database.

Closes gitlab-ee#6948

See merge request gitlab-org/gitlab-ce!21459
parents 984f9ebd 915306ec
...@@ -232,6 +232,8 @@ class Project < ActiveRecord::Base ...@@ -232,6 +232,8 @@ class Project < ActiveRecord::Base
has_many :clusters, through: :cluster_project, class_name: 'Clusters::Cluster' has_many :clusters, through: :cluster_project, class_name: 'Clusters::Cluster'
has_many :cluster_ingresses, through: :clusters, source: :application_ingress, class_name: 'Clusters::Applications::Ingress' has_many :cluster_ingresses, through: :clusters, source: :application_ingress, class_name: 'Clusters::Applications::Ingress'
has_many :prometheus_metrics
# Container repositories need to remove data from the container registry, # Container repositories need to remove data from the container registry,
# which is not managed by the DB. Hence we're still using dependent: :destroy # which is not managed by the DB. Hence we're still using dependent: :destroy
# here. # here.
......
# frozen_string_literal: true
class PrometheusMetric < ActiveRecord::Base
belongs_to :project, validate: true, inverse_of: :prometheus_metrics
enum group: {
# built-in groups
nginx_ingress: -1,
ha_proxy: -2,
aws_elb: -3,
nginx: -4,
kubernetes: -5,
# custom/user groups
business: 0,
response: 1,
system: 2
}
validates :title, presence: true
validates :query, presence: true
validates :group, presence: true
validates :y_label, presence: true
validates :unit, presence: true
validates :project, presence: true, unless: :common?
validates :project, absence: true, if: :common?
scope :common, -> { where(common: true) }
GROUP_TITLES = {
# built-in groups
nginx_ingress: _('Response metrics (NGINX Ingress)'),
ha_proxy: _('Response metrics (HA Proxy)'),
aws_elb: _('Response metrics (AWS ELB)'),
nginx: _('Response metrics (NGINX)'),
kubernetes: _('System metrics (Kubernetes)'),
# custom/user groups
business: _('Business metrics (Custom)'),
response: _('Response metrics (Custom)'),
system: _('System metrics (Custom)')
}.freeze
REQUIRED_METRICS = {
nginx_ingress: %w(nginx_upstream_responses_total nginx_upstream_response_msecs_avg),
ha_proxy: %w(haproxy_frontend_http_requests_total haproxy_frontend_http_responses_total),
aws_elb: %w(aws_elb_request_count_sum aws_elb_latency_average aws_elb_httpcode_backend_5_xx_sum),
nginx: %w(nginx_server_requests nginx_server_requestMsec),
kubernetes: %w(container_memory_usage_bytes container_cpu_usage_seconds_total)
}.freeze
def group_title
GROUP_TITLES[group.to_sym]
end
def required_metrics
REQUIRED_METRICS[group.to_sym].to_a.map(&:to_s)
end
def to_query_metric
Gitlab::Prometheus::Metric.new(id: id, title: title, required_metrics: required_metrics, weight: 0, y_label: y_label, queries: queries)
end
def queries
[
{
query_range: query,
unit: unit,
label: legend,
series: query_series
}.compact
]
end
def query_series
case legend
when 'Status Code'
[{
label: 'status_code',
when: [
{ value: '2xx', color: 'green' },
{ value: '4xx', color: 'orange' },
{ value: '5xx', color: 'red' }
]
}]
end
end
end
---
title: Import all common metrics into database
merge_request: 21459
author:
type: changed
...@@ -7,7 +7,8 @@ ...@@ -7,7 +7,8 @@
- nginx_upstream_responses_total - nginx_upstream_responses_total
weight: 1 weight: 1
queries: queries:
- query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)' - id: response_metrics_nginx_ingress_throughput_status_code
query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
unit: req / sec unit: req / sec
label: Status Code label: Status Code
series: series:
...@@ -25,7 +26,8 @@ ...@@ -25,7 +26,8 @@
- nginx_upstream_response_msecs_avg - nginx_upstream_response_msecs_avg
weight: 1 weight: 1
queries: queries:
- query_range: 'avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"})' - id: response_metrics_nginx_ingress_latency_pod_average
query_range: 'avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"})'
label: Pod average label: Pod average
unit: ms unit: ms
- title: "HTTP Error Rate" - title: "HTTP Error Rate"
...@@ -34,7 +36,8 @@ ...@@ -34,7 +36,8 @@
- nginx_upstream_responses_total - nginx_upstream_responses_total
weight: 1 weight: 1
queries: queries:
- query_range: 'sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) / sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) * 100' - id: response_metrics_nginx_ingress_http_error_rate
query_range: 'sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) / sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) * 100'
label: 5xx Errors label: 5xx Errors
unit: "%" unit: "%"
- group: Response metrics (HA Proxy) - group: Response metrics (HA Proxy)
...@@ -46,10 +49,12 @@ ...@@ -46,10 +49,12 @@
- haproxy_frontend_http_requests_total - haproxy_frontend_http_requests_total
weight: 1 weight: 1
queries: queries:
- query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)' - id: response_metrics_ha_proxy_throughput_status_code
query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)'
unit: req / sec unit: req / sec
label: Status Code
series: series:
- label: code - label: status_code
when: when:
- value: 2xx - value: 2xx
color: green color: green
...@@ -63,7 +68,8 @@ ...@@ -63,7 +68,8 @@
- haproxy_frontend_http_responses_total - haproxy_frontend_http_responses_total
weight: 1 weight: 1
queries: queries:
- query_range: 'sum(rate(haproxy_frontend_http_responses_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_responses_total{%{environment_filter}}[2m]))' - id: response_metrics_ha_proxy_http_error_rate
query_range: 'sum(rate(haproxy_frontend_http_responses_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_responses_total{%{environment_filter}}[2m]))'
label: HTTP Errors label: HTTP Errors
unit: "%" unit: "%"
- group: Response metrics (AWS ELB) - group: Response metrics (AWS ELB)
...@@ -75,7 +81,8 @@ ...@@ -75,7 +81,8 @@
- aws_elb_request_count_sum - aws_elb_request_count_sum
weight: 1 weight: 1
queries: queries:
- query_range: 'sum(aws_elb_request_count_sum{%{environment_filter}}) / 60' - id: response_metrics_aws_elb_throughput_requests
query_range: 'sum(aws_elb_request_count_sum{%{environment_filter}}) / 60'
label: Total label: Total
unit: req / sec unit: req / sec
- title: "Latency" - title: "Latency"
...@@ -84,7 +91,8 @@ ...@@ -84,7 +91,8 @@
- aws_elb_latency_average - aws_elb_latency_average
weight: 1 weight: 1
queries: queries:
- query_range: 'avg(aws_elb_latency_average{%{environment_filter}}) * 1000' - id: response_metrics_aws_elb_latency_average
query_range: 'avg(aws_elb_latency_average{%{environment_filter}}) * 1000'
label: Average label: Average
unit: ms unit: ms
- title: "HTTP Error Rate" - title: "HTTP Error Rate"
...@@ -94,7 +102,8 @@ ...@@ -94,7 +102,8 @@
- aws_elb_httpcode_backend_5_xx_sum - aws_elb_httpcode_backend_5_xx_sum
weight: 1 weight: 1
queries: queries:
- query_range: 'sum(aws_elb_httpcode_backend_5_xx_sum{%{environment_filter}}) / sum(aws_elb_request_count_sum{%{environment_filter}})' - id: response_metrics_aws_elb_http_error_rate
query_range: 'sum(aws_elb_httpcode_backend_5_xx_sum{%{environment_filter}}) / sum(aws_elb_request_count_sum{%{environment_filter}})'
label: HTTP Errors label: HTTP Errors
unit: "%" unit: "%"
- group: Response metrics (NGINX) - group: Response metrics (NGINX)
...@@ -106,7 +115,8 @@ ...@@ -106,7 +115,8 @@
- nginx_server_requests - nginx_server_requests
weight: 1 weight: 1
queries: queries:
- query_range: 'sum(rate(nginx_server_requests{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (code)' - id: response_metrics_nginx_throughput_status_code
query_range: 'sum(rate(nginx_server_requests{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (code)'
unit: req / sec unit: req / sec
label: Status Code label: Status Code
series: series:
...@@ -124,7 +134,8 @@ ...@@ -124,7 +134,8 @@
- nginx_server_requestMsec - nginx_server_requestMsec
weight: 1 weight: 1
queries: queries:
- query_range: 'avg(nginx_server_requestMsec{%{environment_filter}})' - id: response_metrics_nginx_latency
query_range: 'avg(nginx_server_requestMsec{%{environment_filter}})'
label: Upstream label: Upstream
unit: ms unit: ms
- title: "HTTP Error Rate" - title: "HTTP Error Rate"
...@@ -133,7 +144,8 @@ ...@@ -133,7 +144,8 @@
- nginx_server_requests - nginx_server_requests
weight: 1 weight: 1
queries: queries:
- query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))' - id: response_metrics_nginx_http_error_rate
query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))'
label: HTTP Errors label: HTTP Errors
unit: "errors / sec" unit: "errors / sec"
- group: System metrics (Kubernetes) - group: System metrics (Kubernetes)
...@@ -145,7 +157,8 @@ ...@@ -145,7 +157,8 @@
- container_memory_usage_bytes - container_memory_usage_bytes
weight: 4 weight: 4
queries: queries:
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) /1024/1024/1024' - id: system_metrics_kubernetes_container_memory_total
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) /1024/1024/1024'
label: Total label: Total
unit: GB unit: GB
- title: "Core Usage (Total)" - title: "Core Usage (Total)"
...@@ -154,7 +167,8 @@ ...@@ -154,7 +167,8 @@
- container_cpu_usage_seconds_total - container_cpu_usage_seconds_total
weight: 3 weight: 3
queries: queries:
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)' - id: system_metrics_kubernetes_container_cores_total
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)'
label: Total label: Total
unit: "cores" unit: "cores"
- title: "Memory Usage (Pod average)" - title: "Memory Usage (Pod average)"
...@@ -163,15 +177,39 @@ ...@@ -163,15 +177,39 @@
- container_memory_usage_bytes - container_memory_usage_bytes
weight: 2 weight: 2
queries: queries:
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024' - id: system_metrics_kubernetes_container_memory_average
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
label: Pod average
unit: MB
- title: "Canary: Memory Usage (Pod Average)"
y_label: "Memory Used per Pod"
required_metrics:
- container_memory_usage_bytes
weight: 2
queries:
- id: system_metrics_kubernetes_container_memory_average_canary
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
label: Pod average label: Pod average
unit: MB unit: MB
- title: "Core Usage (Pod average)" track: canary
- title: "Core Usage (Pod Average)"
y_label: "Cores per Pod"
required_metrics:
- container_cpu_usage_seconds_total
weight: 1
queries:
- id: system_metrics_kubernetes_container_core_usage
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
label: Pod average
unit: "cores"
- title: "Canary: Core Usage (Pod Average)"
y_label: "Cores per Pod" y_label: "Cores per Pod"
required_metrics: required_metrics:
- container_cpu_usage_seconds_total - container_cpu_usage_seconds_total
weight: 1 weight: 1
queries: queries:
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))' - id: system_metrics_kubernetes_container_core_usage_canary
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
label: Pod average label: Pod average
unit: "cores" unit: "cores"
track: canary
# frozen_string_literal: true
require Rails.root.join('db/importers/common_metrics_importer.rb')
::Importers::CommonMetricsImporter.new.execute
# frozen_string_literal: true
require Rails.root.join('db/importers/common_metrics_importer.rb')
::Importers::CommonMetricsImporter.new.execute
# frozen_string_literal: true
module Importers
class PrometheusMetric < ActiveRecord::Base
enum group: {
# built-in groups
nginx_ingress: -1,
ha_proxy: -2,
aws_elb: -3,
nginx: -4,
kubernetes: -5,
# custom groups
business: 0,
response: 1,
system: 2
}
scope :common, -> { where(common: true) }
GROUP_TITLES = {
business: _('Business metrics (Custom)'),
response: _('Response metrics (Custom)'),
system: _('System metrics (Custom)'),
nginx_ingress: _('Response metrics (NGINX Ingress)'),
ha_proxy: _('Response metrics (HA Proxy)'),
aws_elb: _('Response metrics (AWS ELB)'),
nginx: _('Response metrics (NGINX)'),
kubernetes: _('System metrics (Kubernetes)')
}.freeze
end
class CommonMetricsImporter
MissingQueryId = Class.new(StandardError)
attr_reader :content
def initialize(file = 'config/prometheus/common_metrics.yml')
@content = YAML.load_file(file)
end
def execute
process_content do |id, attributes|
find_or_build_metric!(id)
.update!(**attributes)
end
end
private
def process_content(&blk)
content.map do |group|
process_group(group, &blk)
end
end
def process_group(group, &blk)
attributes = {
group: find_group_title_key(group['group'])
}
group['metrics'].map do |metric|
process_metric(metric, attributes, &blk)
end
end
def process_metric(metric, attributes, &blk)
attributes = attributes.merge(
title: metric['title'],
y_label: metric['y_label'])
metric['queries'].map do |query|
process_metric_query(query, attributes, &blk)
end
end
def process_metric_query(query, attributes, &blk)
attributes = attributes.merge(
legend: query['label'],
query: query['query_range'],
unit: query['unit'])
yield(query['id'], attributes)
end
def find_or_build_metric!(id)
raise MissingQueryId unless id
PrometheusMetric.common.find_by(identifier: id) ||
PrometheusMetric.new(common: true, identifier: id)
end
def find_group_title_key(title)
PrometheusMetric.groups[find_group_title(title)]
end
def find_group_title(title)
PrometheusMetric::GROUP_TITLES.invert[title]
end
end
end
# frozen_string_literal: true
class CreatePrometheusMetrics < ActiveRecord::Migration
DOWNTIME = false
def change
create_table :prometheus_metrics do |t|
t.references :project, index: true, foreign_key: { on_delete: :cascade }, null: false
t.string :title, null: false
t.string :query, null: false
t.string :y_label
t.string :unit
t.string :legend
t.integer :group, null: false, index: true
t.timestamps_with_timezone null: false
end
end
end
# frozen_string_literal: true
class ChangeProjectIdForPrometheusMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
change_column_null :prometheus_metrics, :project_id, true
end
end
# frozen_string_literal: true
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
require Rails.root.join('db/migrate/prometheus_metrics_limits_to_mysql')
class FixPrometheusMetricQueryLimits < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
PrometheusMetricsLimitsToMysql.new.up
end
def down
# no-op
end
end
# frozen_string_literal: true
class AddCommonToPrometheusMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_column_with_default(:prometheus_metrics, :common, :boolean, default: false)
end
def down
remove_column(:prometheus_metrics, :common)
end
end
# frozen_string_literal: true
class AddIndexOnCommonForPrometheusMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :prometheus_metrics, :common
end
def down
remove_concurrent_index :prometheus_metrics, :common
end
end
# frozen_string_literal: true
class AddIdentifierToPrometheusMetric < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :prometheus_metrics, :identifier, :string
end
end
# frozen_string_literal: true
class AddIndexForIdentifierToPrometheusMetric < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :prometheus_metrics, :identifier, unique: true
end
def down
remove_concurrent_index :prometheus_metrics, :identifier, unique: true
end
end
# frozen_string_literal: true
class ImportCommonMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
require Rails.root.join('db/importers/common_metrics_importer.rb')
DOWNTIME = false
def up
Importers::CommonMetricsImporter.new.execute
end
def down
# no-op
end
end
class PrometheusMetricsLimitsToMysql < ActiveRecord::Migration
DOWNTIME = false
def up
return unless Gitlab::Database.mysql?
change_column :prometheus_metrics, :query, :text, limit: 4096, default: nil
end
def down
end
end
...@@ -1700,6 +1700,25 @@ ActiveRecord::Schema.define(version: 20180901171833) do ...@@ -1700,6 +1700,25 @@ ActiveRecord::Schema.define(version: 20180901171833) do
add_index "projects", ["star_count"], name: "index_projects_on_star_count", using: :btree add_index "projects", ["star_count"], name: "index_projects_on_star_count", using: :btree
add_index "projects", ["visibility_level"], name: "index_projects_on_visibility_level", using: :btree add_index "projects", ["visibility_level"], name: "index_projects_on_visibility_level", using: :btree
create_table "prometheus_metrics", force: :cascade do |t|
t.integer "project_id"
t.string "title", null: false
t.string "query", null: false
t.string "y_label"
t.string "unit"
t.string "legend"
t.integer "group", null: false
t.datetime_with_timezone "created_at", null: false
t.datetime_with_timezone "updated_at", null: false
t.boolean "common", default: false, null: false
t.string "identifier"
end
add_index "prometheus_metrics", ["common"], name: "index_prometheus_metrics_on_common", using: :btree
add_index "prometheus_metrics", ["group"], name: "index_prometheus_metrics_on_group", using: :btree
add_index "prometheus_metrics", ["identifier"], name: "index_prometheus_metrics_on_identifier", unique: true, using: :btree
add_index "prometheus_metrics", ["project_id"], name: "index_prometheus_metrics_on_project_id", using: :btree
create_table "protected_branch_merge_access_levels", force: :cascade do |t| create_table "protected_branch_merge_access_levels", force: :cascade do |t|
t.integer "protected_branch_id", null: false t.integer "protected_branch_id", null: false
t.integer "access_level", default: 40, null: false t.integer "access_level", default: 40, null: false
...@@ -2380,6 +2399,7 @@ ActiveRecord::Schema.define(version: 20180901171833) do ...@@ -2380,6 +2399,7 @@ ActiveRecord::Schema.define(version: 20180901171833) do
add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade
add_foreign_key "project_mirror_data", "projects", on_delete: :cascade add_foreign_key "project_mirror_data", "projects", on_delete: :cascade
add_foreign_key "project_statistics", "projects", on_delete: :cascade add_foreign_key "project_statistics", "projects", on_delete: :cascade
add_foreign_key "prometheus_metrics", "projects", on_delete: :cascade
add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade
add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade
add_foreign_key "protected_branches", "projects", name: "fk_7a9c6d93e7", on_delete: :cascade add_foreign_key "protected_branches", "projects", name: "fk_7a9c6d93e7", on_delete: :cascade
......
...@@ -47,6 +47,7 @@ description: 'Learn how to contribute to GitLab.' ...@@ -47,6 +47,7 @@ description: 'Learn how to contribute to GitLab.'
- [How to dump production data to staging](db_dump.md) - [How to dump production data to staging](db_dump.md)
- [Working with the GitHub importer](github_importer.md) - [Working with the GitHub importer](github_importer.md)
- [Working with Merge Request diffs](diffs.md) - [Working with Merge Request diffs](diffs.md)
- [Prometheus metrics](prometheus_metrics.md)
## Performance guides ## Performance guides
......
# Working with Prometheus Metrics
## Adding to the library
We strive to support the 2-4 most important metrics for each common system service that supports Prometheus. If you are looking for support for a particular exporter which has not yet been added to the library, additions can be made [to the `common_metrics.yml`](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/prometheus/common_metrics.yml) file.
### Query identifier
The requirement for adding a new metric is to make each query to have an unique identifier which is used to update the metric later when changed:
```yaml
- group: Response metrics (NGINX Ingress)
metrics:
- title: "Throughput"
y_label: "Requests / Sec"
queries:
- id: response_metrics_nginx_ingress_throughput_status_code
query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
unit: req / sec
label: Status Code
```
### Update existing metrics
After you add or change existing _common_ metric you have to create a new database migration that will query and update all existing metrics.
NOTE: **Note:**
If a query metric (which is identified by `id:`) is removed it will not be removed from database by default.
You might want to add additional database migration that makes a decision what to do with removed one.
For example: you might be interested in migrating all dependent data to a different metric.
```ruby
class ImportCommonMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
require Rails.root.join('db/importers/common_metrics_importer.rb')
DOWNTIME = false
def up
Importers::CommonMetricsImporter.new.execute
end
def down
# no-op
end
end
```
...@@ -17,9 +17,3 @@ GitLab retrieves performance data from the configured Prometheus server, and att ...@@ -17,9 +17,3 @@ GitLab retrieves performance data from the configured Prometheus server, and att
In order to isolate and only display relevant metrics for a given environment, GitLab needs a method to detect which labels are associated. To do that, In order to isolate and only display relevant metrics for a given environment, GitLab needs a method to detect which labels are associated. To do that,
GitLab uses the defined queries and fills in the environment specific variables. Typically this involves looking for the [$CI_ENVIRONMENT_SLUG](../../../../ci/variables/README.md#predefined-variables-environment-variables), but may also include other information such as the project's Kubernetes namespace. Each search query is defined in the [exporter specific documentation](#prometheus-metrics-library). GitLab uses the defined queries and fills in the environment specific variables. Typically this involves looking for the [$CI_ENVIRONMENT_SLUG](../../../../ci/variables/README.md#predefined-variables-environment-variables), but may also include other information such as the project's Kubernetes namespace. Each search query is defined in the [exporter specific documentation](#prometheus-metrics-library).
## Adding to the library
We strive to support the 2-4 most important metrics for each common system service that supports Prometheus. If you are looking for support for a particular exporter which has not yet been added to the library, additions can be made [to the `additional_metrics.yml`](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/prometheus/additional_metrics.yml) file.
> Note: The library is only for monitoring public, common, system services which all customers can benefit from. Support for monitoring [customer proprietary metrics](https://gitlab.com/gitlab-org/gitlab-ee/issues/2273) will be added in a subsequent release.
...@@ -64,6 +64,7 @@ project_tree: ...@@ -64,6 +64,7 @@ project_tree:
- :create_access_levels - :create_access_levels
- :project_feature - :project_feature
- :custom_attributes - :custom_attributes
- :prometheus_metrics
- :project_badges - :project_badges
- :ci_cd_settings - :ci_cd_settings
...@@ -108,6 +109,9 @@ excluded_attributes: ...@@ -108,6 +109,9 @@ excluded_attributes:
- :remote_mirror_available_overridden - :remote_mirror_available_overridden
- :description_html - :description_html
- :repository_languages - :repository_languages
prometheus_metrics:
- :common
- :identifier
snippets: snippets:
- :expired_at - :expired_at
merge_request_diff: merge_request_diff:
......
...@@ -5,7 +5,7 @@ module Gitlab ...@@ -5,7 +5,7 @@ module Gitlab
MUTEX = Mutex.new MUTEX = Mutex.new
extend self extend self
def load_groups_from_yaml(file_name = 'additional_metrics.yml') def load_groups_from_yaml(file_name)
yaml_metrics_raw(file_name).map(&method(:group_from_entry)) yaml_metrics_raw(file_name).map(&method(:group_from_entry))
end end
......
...@@ -4,10 +4,13 @@ module Gitlab ...@@ -4,10 +4,13 @@ module Gitlab
include ActiveModel::Model include ActiveModel::Model
attr_accessor :name, :priority, :metrics attr_accessor :name, :priority, :metrics
validates :name, :priority, :metrics, presence: true validates :name, :priority, :metrics, presence: true
def self.common_metrics def self.common_metrics
AdditionalMetricsParser.load_groups_from_yaml ::PrometheusMetric.common.group_by(&:group_title).map do |name, metrics|
MetricGroup.new(name: name, priority: 0, metrics: metrics.map(&:to_query_metric))
end
end end
# EE only # EE only
......
...@@ -3,6 +3,7 @@ require Rails.root.join('db/migrate/markdown_cache_limits_to_mysql') ...@@ -3,6 +3,7 @@ require Rails.root.join('db/migrate/markdown_cache_limits_to_mysql')
require Rails.root.join('db/migrate/merge_request_diff_file_limits_to_mysql') require Rails.root.join('db/migrate/merge_request_diff_file_limits_to_mysql')
require Rails.root.join('db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql') require Rails.root.join('db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql')
require Rails.root.join('db/migrate/gpg_keys_limits_to_mysql') require Rails.root.join('db/migrate/gpg_keys_limits_to_mysql')
require Rails.root.join('db/migrate/prometheus_metrics_limits_to_mysql')
desc "GitLab | Add limits to strings in mysql database" desc "GitLab | Add limits to strings in mysql database"
task add_limits_mysql: :environment do task add_limits_mysql: :environment do
...@@ -12,4 +13,5 @@ task add_limits_mysql: :environment do ...@@ -12,4 +13,5 @@ task add_limits_mysql: :environment do
MergeRequestDiffFileLimitsToMysql.new.up MergeRequestDiffFileLimitsToMysql.new.up
LimitsCiBuildTraceChunksRawDataForMysql.new.up LimitsCiBuildTraceChunksRawDataForMysql.new.up
IncreaseMysqlTextLimitForGpgKeys.new.up IncreaseMysqlTextLimitForGpgKeys.new.up
PrometheusMetricsLimitsToMysql.new.up
end end
...@@ -1035,6 +1035,9 @@ msgstr "" ...@@ -1035,6 +1035,9 @@ msgstr ""
msgid "Browse files" msgid "Browse files"
msgstr "" msgstr ""
msgid "Business metrics (Custom)"
msgstr ""
msgid "ByAuthor|by" msgid "ByAuthor|by"
msgstr "" msgstr ""
...@@ -4917,6 +4920,21 @@ msgstr "" ...@@ -4917,6 +4920,21 @@ msgstr ""
msgid "Resolve discussion" msgid "Resolve discussion"
msgstr "" msgstr ""
msgid "Response metrics (AWS ELB)"
msgstr ""
msgid "Response metrics (Custom)"
msgstr ""
msgid "Response metrics (HA Proxy)"
msgstr ""
msgid "Response metrics (NGINX Ingress)"
msgstr ""
msgid "Response metrics (NGINX)"
msgstr ""
msgid "Resume" msgid "Resume"
msgstr "" msgstr ""
...@@ -5500,6 +5518,12 @@ msgstr "" ...@@ -5500,6 +5518,12 @@ msgstr ""
msgid "System Info" msgid "System Info"
msgstr "" msgstr ""
msgid "System metrics (Custom)"
msgstr ""
msgid "System metrics (Kubernetes)"
msgstr ""
msgid "Tag (%{tag_count})" msgid "Tag (%{tag_count})"
msgid_plural "Tags (%{tag_count})" msgid_plural "Tags (%{tag_count})"
msgstr[0] "" msgstr[0] ""
......
# frozen_string_literal: true
require 'spec_helper'
describe 'Import metrics on development seed' do
subject { load Rails.root.join('db', 'fixtures', 'development', '99_common_metrics.rb') }
it "imports all prometheus metrics" do
expect(PrometheusMetric.common).to be_empty
subject
expect(PrometheusMetric.common).not_to be_empty
end
end
# frozen_string_literal: true
require 'rails_helper'
require Rails.root.join("db", "importers", "common_metrics_importer.rb")
describe Importers::PrometheusMetric do
it 'group enum equals ::PrometheusMetric' do
expect(described_class.groups).to eq(::PrometheusMetric.groups)
end
it 'GROUP_TITLES equals ::PrometheusMetric' do
expect(described_class::GROUP_TITLES).to eq(::PrometheusMetric::GROUP_TITLES)
end
end
describe Importers::CommonMetricsImporter do
subject { described_class.new }
context "does import common_metrics.yml" do
let(:groups) { subject.content }
let(:metrics) { groups.map { |group| group['metrics'] }.flatten }
let(:queries) { metrics.map { |group| group['queries'] }.flatten }
let(:query_ids) { queries.map { |query| query['id'] } }
before do
subject.execute
end
it "has the same amount of groups" do
expect(PrometheusMetric.common.group(:group).count.count).to eq(groups.count)
end
it "has the same amount of metrics" do
expect(PrometheusMetric.common.group(:group, :title).count.count).to eq(metrics.count)
end
it "has the same amount of queries" do
expect(PrometheusMetric.common.count).to eq(queries.count)
end
it "does not have duplicate IDs" do
expect(query_ids).to eq(query_ids.uniq)
end
it "imports all IDs" do
expect(PrometheusMetric.common.pluck(:identifier)).to contain_exactly(*query_ids)
end
end
context 'does import properly all fields' do
let(:query_identifier) { 'response-metric' }
let(:group) do
{
group: 'Response metrics (NGINX Ingress)',
metrics: [{
title: "Throughput",
y_label: "Requests / Sec",
queries: [{
id: query_identifier,
query_range: 'my-query',
unit: 'my-unit',
label: 'status code'
}]
}]
}
end
before do
expect(subject).to receive(:content) { [group.deep_stringify_keys] }
end
shared_examples 'stores metric' do
let(:metric) { PrometheusMetric.find_by(identifier: query_identifier) }
it 'with all data' do
expect(metric.group).to eq('nginx_ingress')
expect(metric.title).to eq('Throughput')
expect(metric.y_label).to eq('Requests / Sec')
expect(metric.unit).to eq('my-unit')
expect(metric.legend).to eq('status code')
expect(metric.query).to eq('my-query')
end
end
context 'if ID is missing' do
let(:query_identifier) { }
it 'raises exception' do
expect { subject.execute }.to raise_error(described_class::MissingQueryId)
end
end
context 'for existing common metric with different ID' do
let!(:existing_metric) { create(:prometheus_metric, :common, identifier: 'my-existing-metric') }
before do
subject.execute
end
it_behaves_like 'stores metric' do
it 'and existing metric is not changed' do
expect(metric).not_to eq(existing_metric)
end
end
end
context 'when metric with ID exists ' do
let!(:existing_metric) { create(:prometheus_metric, :common, identifier: 'response-metric') }
before do
subject.execute
end
it_behaves_like 'stores metric' do
it 'and existing metric is changed' do
expect(metric).to eq(existing_metric)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe 'Import metrics on production seed' do
subject { load Rails.root.join('db', 'fixtures', 'production', '999_common_metrics.rb') }
it "imports all prometheus metrics" do
expect(PrometheusMetric.common).to be_empty
subject
expect(PrometheusMetric.common).not_to be_empty
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :prometheus_metric, class: PrometheusMetric do
title 'title'
query 'avg(metric)'
y_label 'y_label'
unit 'm/s'
group :business
project
legend 'legend'
trait :common do
common true
project nil
end
end
end
...@@ -288,6 +288,7 @@ project: ...@@ -288,6 +288,7 @@ project:
- fork_network_member - fork_network_member
- fork_network - fork_network
- custom_attributes - custom_attributes
- prometheus_metrics
- lfs_file_locks - lfs_file_locks
- project_badges - project_badges
- source_of_merge_requests - source_of_merge_requests
...@@ -303,6 +304,8 @@ award_emoji: ...@@ -303,6 +304,8 @@ award_emoji:
- user - user
priorities: priorities:
- label - label
prometheus_metrics:
- project
timelogs: timelogs:
- issue - issue
- merge_request - merge_request
......
...@@ -555,6 +555,19 @@ ProjectCustomAttribute: ...@@ -555,6 +555,19 @@ ProjectCustomAttribute:
- project_id - project_id
- key - key
- value - value
PrometheusMetric:
- id
- created_at
- updated_at
- project_id
- y_label
- unit
- legend
- title
- query
- group
- common
- identifier
Badge: Badge:
- id - id
- link_url - link_url
......
...@@ -6,7 +6,7 @@ describe Gitlab::Prometheus::AdditionalMetricsParser do ...@@ -6,7 +6,7 @@ describe Gitlab::Prometheus::AdditionalMetricsParser do
let(:parser_error_class) { Gitlab::Prometheus::ParsingError } let(:parser_error_class) { Gitlab::Prometheus::ParsingError }
describe '#load_groups_from_yaml' do describe '#load_groups_from_yaml' do
subject { described_class.load_groups_from_yaml } subject { described_class.load_groups_from_yaml('dummy.yaml') }
describe 'parsing sample yaml' do describe 'parsing sample yaml' do
let(:sample_yaml) do let(:sample_yaml) do
......
# frozen_string_literal: true
require 'rails_helper'
describe Gitlab::Prometheus::MetricGroup do
describe '.common_metrics' do
let!(:project_metric) { create(:prometheus_metric) }
let!(:common_metric_group_a) { create(:prometheus_metric, :common, group: :aws_elb) }
let!(:common_metric_group_b_q1) { create(:prometheus_metric, :common, group: :kubernetes) }
let!(:common_metric_group_b_q2) { create(:prometheus_metric, :common, group: :kubernetes) }
subject { described_class.common_metrics }
it 'returns exactly two groups' do
expect(subject.map(&:name)).to contain_exactly(
'Response metrics (AWS ELB)', 'System metrics (Kubernetes)')
end
it 'returns exactly three metric queries' do
expect(subject.map(&:metrics).flatten.map(&:id)).to contain_exactly(
common_metric_group_a.id, common_metric_group_b_q1.id,
common_metric_group_b_q2.id)
end
end
describe '.for_project' do
let!(:other_project) { create(:project) }
let!(:project_metric) { create(:prometheus_metric) }
let!(:common_metric) { create(:prometheus_metric, :common, group: :aws_elb) }
subject do
described_class.for_project(other_project)
.map(&:metrics).flatten
.map(&:id)
end
it 'returns exactly one common metric' do
is_expected.to contain_exactly(common_metric.id)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'migrate', '20180831164910_import_common_metrics.rb')
describe ImportCommonMetrics, :migration do
describe '#up' do
it "imports all prometheus metrics" do
expect(PrometheusMetric.common).to be_empty
migrate!
expect(PrometheusMetric.common).not_to be_empty
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe PrometheusMetric do
subject { build(:prometheus_metric) }
let(:other_project) { build(:project) }
it { is_expected.to belong_to(:project) }
it { is_expected.to validate_presence_of(:title) }
it { is_expected.to validate_presence_of(:query) }
it { is_expected.to validate_presence_of(:group) }
describe 'common metrics' do
using RSpec::Parameterized::TableSyntax
where(:common, :project, :result) do
false | other_project | true
false | nil | false
true | other_project | false
true | nil | true
end
with_them do
before do
subject.common = common
subject.project = project
end
it { expect(subject.valid?).to eq(result) }
end
end
describe '#query_series' do
using RSpec::Parameterized::TableSyntax
where(:legend, :type) do
'Some other legend' | NilClass
'Status Code' | Array
end
with_them do
before do
subject.legend = legend
end
it { expect(subject.query_series).to be_a(type) }
end
end
describe '#group_title' do
shared_examples 'group_title' do |group, title|
subject { build(:prometheus_metric, group: group).group_title }
it "returns text #{title} for group #{group}" do
expect(subject).to eq(title)
end
end
it_behaves_like 'group_title', :business, 'Business metrics (Custom)'
it_behaves_like 'group_title', :response, 'Response metrics (Custom)'
it_behaves_like 'group_title', :system, 'System metrics (Custom)'
end
describe '#to_query_metric' do
it 'converts to queryable metric object' do
expect(subject.to_query_metric).to be_instance_of(Gitlab::Prometheus::Metric)
end
it 'queryable metric object has title' do
expect(subject.to_query_metric.title).to eq(subject.title)
end
it 'queryable metric object has y_label' do
expect(subject.to_query_metric.y_label).to eq(subject.y_label)
end
it 'queryable metric has no required_metric' do
expect(subject.to_query_metric.required_metrics).to eq([])
end
it 'queryable metric has weight 0' do
expect(subject.to_query_metric.weight).to eq(0)
end
it 'queryable metrics has query description' do
queries = [
{
query_range: subject.query,
unit: subject.unit,
label: subject.legend
}
]
expect(subject.to_query_metric.queries).to eq(queries)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment