Commit 36359d99 authored by João Alexandre Cunha's avatar João Alexandre Cunha Committed by Imre Farkas

Fetch nodes data from cluster

- Fetches nodes status [CPU and Memory: allocatable and capacity]
- Fetches nodes metrics [CPU and Memory: usage]
  - For this we had to add a new k8s api
  - Metrics are only fetched if cluster is provide by GCP,
    since other provider might not implement the metrics server
- Exposes node data to the FE via ClusterSerializer#represent_list
- Adds and refactor specs
parent d7c3d735
...@@ -206,10 +206,16 @@ module Clusters ...@@ -206,10 +206,16 @@ module Clusters
end end
end end
def nodes
with_reactive_cache do |data|
data[:nodes]
end
end
def calculate_reactive_cache def calculate_reactive_cache
return unless enabled? return unless enabled?
{ connection_status: retrieve_connection_status } { connection_status: retrieve_connection_status, nodes: retrieve_nodes }
end end
def persisted_applications def persisted_applications
...@@ -348,30 +354,53 @@ module Clusters ...@@ -348,30 +354,53 @@ module Clusters
end end
def retrieve_connection_status def retrieve_connection_status
kubeclient.core_client.discover result = ::Gitlab::Kubernetes::KubeClient.graceful_request(id) { kubeclient.core_client.discover }
rescue *Gitlab::Kubernetes::Errors::CONNECTION result[:status]
:unreachable
rescue *Gitlab::Kubernetes::Errors::AUTHENTICATION
:authentication_failure
rescue Kubeclient::HttpError => e
kubeclient_error_status(e.message)
rescue => e
Gitlab::ErrorTracking.track_exception(e, cluster_id: id)
:unknown_failure
else
:connected
end end
# KubeClient uses the same error class def retrieve_nodes
# For connection errors (eg. timeout) and result = ::Gitlab::Kubernetes::KubeClient.graceful_request(id) { kubeclient.get_nodes }
# for Kubernetes errors. cluster_nodes = result[:response].to_a
def kubeclient_error_status(message)
if message&.match?(/timed out|timeout/i) result = ::Gitlab::Kubernetes::KubeClient.graceful_request(id) { kubeclient.metrics_client.get_nodes }
:unreachable nodes_metrics = result[:response].to_a
else
:authentication_failure cluster_nodes.inject([]) do |memo, node|
sliced_node = filter_relevant_node_attributes(node)
matched_node_metric = nodes_metrics.find { |node_metric| node_metric.metadata.name == node.metadata.name }
sliced_node_metrics = matched_node_metric ? filter_relevant_node_metrics_attributes(matched_node_metric) : {}
memo << sliced_node.merge(sliced_node_metrics)
end
end
def filter_relevant_node_attributes(node)
{
'metadata' => {
'name' => node.metadata.name
},
'status' => {
'capacity' => {
'cpu' => node.status.capacity.cpu,
'memory' => node.status.capacity.memory
},
'allocatable' => {
'cpu' => node.status.allocatable.cpu,
'memory' => node.status.allocatable.memory
}
}
}
end end
def filter_relevant_node_metrics_attributes(node_metrics)
{
'usage' => {
'cpu' => node_metrics.usage.cpu,
'memory' => node_metrics.usage.memory
}
}
end end
# To keep backward compatibility with AUTO_DEVOPS_DOMAIN # To keep backward compatibility with AUTO_DEVOPS_DOMAIN
......
...@@ -7,12 +7,12 @@ class ClusterEntity < Grape::Entity ...@@ -7,12 +7,12 @@ class ClusterEntity < Grape::Entity
expose :enabled expose :enabled
expose :environment_scope expose :environment_scope
expose :name expose :name
expose :nodes
expose :status_name, as: :status expose :status_name, as: :status
expose :status_reason expose :status_reason
expose :applications, using: ClusterApplicationEntity
expose :path do |cluster| expose :path do |cluster|
Clusters::ClusterPresenter.new(cluster).show_path # rubocop: disable CodeReuse/Presenter Clusters::ClusterPresenter.new(cluster).show_path # rubocop: disable CodeReuse/Presenter
end end
expose :applications, using: ClusterApplicationEntity
end end
...@@ -11,6 +11,7 @@ class ClusterSerializer < BaseSerializer ...@@ -11,6 +11,7 @@ class ClusterSerializer < BaseSerializer
:enabled, :enabled,
:environment_scope, :environment_scope,
:name, :name,
:nodes,
:path, :path,
:status :status
] ]
......
...@@ -20,6 +20,7 @@ module Gitlab ...@@ -20,6 +20,7 @@ module Gitlab
extensions: { group: 'apis/extensions', version: 'v1beta1' }, extensions: { group: 'apis/extensions', version: 'v1beta1' },
istio: { group: 'apis/networking.istio.io', version: 'v1alpha3' }, istio: { group: 'apis/networking.istio.io', version: 'v1alpha3' },
knative: { group: 'apis/serving.knative.dev', version: 'v1alpha1' }, knative: { group: 'apis/serving.knative.dev', version: 'v1alpha1' },
metrics: { group: 'apis/metrics.k8s.io', version: 'v1beta1' },
networking: { group: 'apis/networking.k8s.io', version: 'v1' } networking: { group: 'apis/networking.k8s.io', version: 'v1' }
}.freeze }.freeze
...@@ -34,7 +35,8 @@ module Gitlab ...@@ -34,7 +35,8 @@ module Gitlab
end end
# Core API methods delegates to the core api group client # Core API methods delegates to the core api group client
delegate :get_pods, delegate :get_nodes,
:get_pods,
:get_secrets, :get_secrets,
:get_config_map, :get_config_map,
:get_namespace, :get_namespace,
...@@ -102,6 +104,31 @@ module Gitlab ...@@ -102,6 +104,31 @@ module Gitlab
} }
}.freeze }.freeze
def self.graceful_request(cluster_id)
{ status: :connected, response: yield }
rescue *Gitlab::Kubernetes::Errors::CONNECTION
{ status: :unreachable }
rescue *Gitlab::Kubernetes::Errors::AUTHENTICATION
{ status: :authentication_failure }
rescue Kubeclient::HttpError => e
{ status: kubeclient_error_status(e.message) }
rescue => e
Gitlab::ErrorTracking.track_exception(e, cluster_id: cluster_id)
{ status: :unknown_failure }
end
# KubeClient uses the same error class
# For connection errors (eg. timeout) and
# for Kubernetes errors.
def self.kubeclient_error_status(message)
if message&.match?(/timed out|timeout/i)
:unreachable
else
:authentication_failure
end
end
# We disable redirects through 'http_max_redirects: 0', # We disable redirects through 'http_max_redirects: 0',
# so that KubeClient does not follow redirects and # so that KubeClient does not follow redirects and
# expose internal services. # expose internal services.
......
...@@ -64,6 +64,45 @@ describe Gitlab::Kubernetes::KubeClient do ...@@ -64,6 +64,45 @@ describe Gitlab::Kubernetes::KubeClient do
end end
end end
describe '.graceful_request' do
context 'successful' do
before do
allow(client).to receive(:foo).and_return(true)
end
it 'returns connected status and foo response' do
result = described_class.graceful_request(1) { client.foo }
expect(result).to eq({ status: :connected, response: true })
end
end
context 'errored' do
using RSpec::Parameterized::TableSyntax
where(:error, :error_status) do
SocketError | :unreachable
OpenSSL::X509::CertificateError | :authentication_failure
StandardError | :unknown_failure
Kubeclient::HttpError.new(408, "timed out", nil) | :unreachable
Kubeclient::HttpError.new(408, "timeout", nil) | :unreachable
Kubeclient::HttpError.new(408, "", nil) | :authentication_failure
end
with_them do
before do
allow(client).to receive(:foo).and_raise(error)
end
it 'returns error status' do
result = described_class.graceful_request(1) { client.foo }
expect(result).to eq({ status: error_status })
end
end
end
end
describe '#initialize' do describe '#initialize' do
shared_examples 'local address' do shared_examples 'local address' do
it 'blocks local addresses' do it 'blocks local addresses' do
...@@ -188,10 +227,25 @@ describe Gitlab::Kubernetes::KubeClient do ...@@ -188,10 +227,25 @@ describe Gitlab::Kubernetes::KubeClient do
end end
end end
describe '#metrics_client' do
subject { client.metrics_client }
it_behaves_like 'a Kubeclient'
it 'has the metrics API group endpoint' do
expect(subject.api_endpoint.to_s).to match(%r{\/apis\/metrics.k8s.io\Z})
end
it 'has the api_version' do
expect(subject.instance_variable_get(:@api_version)).to eq('v1beta1')
end
end
describe 'core API' do describe 'core API' do
let(:core_client) { client.core_client } let(:core_client) { client.core_client }
[ [
:get_nodes,
:get_pods, :get_pods,
:get_secrets, :get_secrets,
:get_config_map, :get_config_map,
......
...@@ -948,6 +948,22 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do ...@@ -948,6 +948,22 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
end end
end end
describe '#nodes' do
let(:cluster) { create(:cluster) }
subject { cluster.nodes }
it { is_expected.to be_nil }
context 'with a cached status' do
before do
stub_reactive_cache(cluster, nodes: [kube_node])
end
it { is_expected.to eq([kube_node]) }
end
end
describe '#calculate_reactive_cache' do describe '#calculate_reactive_cache' do
subject { cluster.calculate_reactive_cache } subject { cluster.calculate_reactive_cache }
...@@ -956,6 +972,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do ...@@ -956,6 +972,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
it 'does not populate the cache' do it 'does not populate the cache' do
expect(cluster).not_to receive(:retrieve_connection_status) expect(cluster).not_to receive(:retrieve_connection_status)
expect(cluster).not_to receive(:retrieve_nodes)
is_expected.to be_nil is_expected.to be_nil
end end
...@@ -964,12 +981,12 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do ...@@ -964,12 +981,12 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
context 'cluster is enabled' do context 'cluster is enabled' do
let(:cluster) { create(:cluster, :provided_by_user, :group) } let(:cluster) { create(:cluster, :provided_by_user, :group) }
context 'connection to the cluster is successful' do
before do before do
stub_kubeclient_discover(cluster.platform.api_url) stub_kubeclient_nodes_and_nodes_metrics(cluster.platform.api_url)
end end
it { is_expected.to eq(connection_status: :connected) } context 'connection to the cluster is successful' do
it { is_expected.to eq(connection_status: :connected, nodes: [kube_node.merge(kube_node_metrics)]) }
end end
context 'cluster cannot be reached' do context 'cluster cannot be reached' do
...@@ -978,7 +995,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do ...@@ -978,7 +995,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
.and_raise(SocketError) .and_raise(SocketError)
end end
it { is_expected.to eq(connection_status: :unreachable) } it { is_expected.to eq(connection_status: :unreachable, nodes: []) }
end end
context 'cluster cannot be authenticated to' do context 'cluster cannot be authenticated to' do
...@@ -987,7 +1004,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do ...@@ -987,7 +1004,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
.and_raise(OpenSSL::X509::CertificateError.new("Certificate error")) .and_raise(OpenSSL::X509::CertificateError.new("Certificate error"))
end end
it { is_expected.to eq(connection_status: :authentication_failure) } it { is_expected.to eq(connection_status: :authentication_failure, nodes: []) }
end end
describe 'Kubeclient::HttpError' do describe 'Kubeclient::HttpError' do
...@@ -999,18 +1016,18 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do ...@@ -999,18 +1016,18 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
.and_raise(Kubeclient::HttpError.new(error_code, error_message, nil)) .and_raise(Kubeclient::HttpError.new(error_code, error_message, nil))
end end
it { is_expected.to eq(connection_status: :authentication_failure) } it { is_expected.to eq(connection_status: :authentication_failure, nodes: []) }
context 'generic timeout' do context 'generic timeout' do
let(:error_message) { 'Timed out connecting to server'} let(:error_message) { 'Timed out connecting to server'}
it { is_expected.to eq(connection_status: :unreachable) } it { is_expected.to eq(connection_status: :unreachable, nodes: []) }
end end
context 'gateway timeout' do context 'gateway timeout' do
let(:error_message) { '504 Gateway Timeout for GET https://kubernetes.example.com/api/v1'} let(:error_message) { '504 Gateway Timeout for GET https://kubernetes.example.com/api/v1'}
it { is_expected.to eq(connection_status: :unreachable) } it { is_expected.to eq(connection_status: :unreachable, nodes: []) }
end end
end end
...@@ -1020,11 +1037,12 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do ...@@ -1020,11 +1037,12 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
.and_raise(StandardError) .and_raise(StandardError)
end end
it { is_expected.to eq(connection_status: :unknown_failure) } it { is_expected.to eq(connection_status: :unknown_failure, nodes: []) }
it 'notifies Sentry' do it 'notifies Sentry' do
expect(Gitlab::ErrorTracking).to receive(:track_exception) expect(Gitlab::ErrorTracking).to receive(:track_exception)
.with(instance_of(StandardError), hash_including(cluster_id: cluster.id)) .with(instance_of(StandardError), hash_including(cluster_id: cluster.id))
.twice
subject subject
end end
......
...@@ -3,23 +3,41 @@ ...@@ -3,23 +3,41 @@
require 'spec_helper' require 'spec_helper'
describe ClusterSerializer do describe ClusterSerializer do
let(:cluster) { create(:cluster, :project, provider_type: :user) }
describe '#represent_list' do
subject { described_class.new.represent_list(cluster).keys }
it 'serializes attrs correctly' do
is_expected.to contain_exactly(
:cluster_type,
:enabled,
:environment_scope,
:name,
:nodes,
:path,
:status)
end
end
describe '#represent_status' do describe '#represent_status' do
subject { described_class.new.represent_status(cluster) } subject { described_class.new.represent_status(cluster).keys }
context 'when provider type is gcp and cluster is errored' do
let(:cluster) do
errored_provider = create(:cluster_provider_gcp, :errored)
context 'when provider type is gcp' do create(:cluster, provider_type: :gcp, provider_gcp: errored_provider)
let(:cluster) { create(:cluster, provider_type: :gcp, provider_gcp: provider) } end
let(:provider) { create(:cluster_provider_gcp, :errored) }
it 'serializes only status' do it 'serializes attrs correctly' do
expect(subject.keys).to contain_exactly(:status, :status_reason, :applications) is_expected.to contain_exactly(:status, :status_reason, :applications)
end end
end end
context 'when provider type is user' do context 'when provider type is user' do
let(:cluster) { create(:cluster, provider_type: :user) } it 'serializes attrs correctly' do
is_expected.to contain_exactly(:status, :status_reason, :applications)
it 'serializes only status' do
expect(subject.keys).to contain_exactly(:status, :status_reason, :applications)
end end
end end
end end
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
module KubernetesHelpers module KubernetesHelpers
include Gitlab::Kubernetes include Gitlab::Kubernetes
NODE_NAME = "gke-cluster-applications-default-pool-49b7f225-v527"
def kube_response(body) def kube_response(body)
{ body: body.to_json } { body: body.to_json }
end end
...@@ -11,6 +13,14 @@ module KubernetesHelpers ...@@ -11,6 +13,14 @@ module KubernetesHelpers
kube_response(kube_pods_body) kube_response(kube_pods_body)
end end
def nodes_response
kube_response(nodes_body)
end
def nodes_metrics_response
kube_response(nodes_metrics_body)
end
def kube_pod_response def kube_pod_response
kube_response(kube_pod) kube_response(kube_pod)
end end
...@@ -34,6 +44,9 @@ module KubernetesHelpers ...@@ -34,6 +44,9 @@ module KubernetesHelpers
WebMock WebMock
.stub_request(:get, api_url + '/apis/rbac.authorization.k8s.io/v1') .stub_request(:get, api_url + '/apis/rbac.authorization.k8s.io/v1')
.to_return(kube_response(kube_v1_rbac_authorization_discovery_body)) .to_return(kube_response(kube_v1_rbac_authorization_discovery_body))
WebMock
.stub_request(:get, api_url + '/apis/metrics.k8s.io/v1beta1')
.to_return(kube_response(kube_metrics_v1beta1_discovery_body))
end end
def stub_kubeclient_discover_istio(api_url) def stub_kubeclient_discover_istio(api_url)
...@@ -76,6 +89,22 @@ module KubernetesHelpers ...@@ -76,6 +89,22 @@ module KubernetesHelpers
WebMock.stub_request(:get, pods_url).to_return(response || kube_pods_response) WebMock.stub_request(:get, pods_url).to_return(response || kube_pods_response)
end end
def stub_kubeclient_nodes(api_url)
stub_kubeclient_discover_base(api_url)
nodes_url = api_url + "/api/v1/nodes"
WebMock.stub_request(:get, nodes_url).to_return(nodes_response)
end
def stub_kubeclient_nodes_and_nodes_metrics(api_url)
stub_kubeclient_nodes(api_url)
nodes_url = api_url + "/apis/metrics.k8s.io/v1beta1/nodes"
WebMock.stub_request(:get, nodes_url).to_return(nodes_metrics_response)
end
def stub_kubeclient_pods(namespace, status: nil) def stub_kubeclient_pods(namespace, status: nil)
stub_kubeclient_discover(service.api_url) stub_kubeclient_discover(service.api_url)
pods_url = service.api_url + "/api/v1/namespaces/#{namespace}/pods" pods_url = service.api_url + "/api/v1/namespaces/#{namespace}/pods"
...@@ -254,6 +283,7 @@ module KubernetesHelpers ...@@ -254,6 +283,7 @@ module KubernetesHelpers
{ {
"kind" => "APIResourceList", "kind" => "APIResourceList",
"resources" => [ "resources" => [
{ "name" => "nodes", "namespaced" => false, "kind" => "Node" },
{ "name" => "pods", "namespaced" => true, "kind" => "Pod" }, { "name" => "pods", "namespaced" => true, "kind" => "Pod" },
{ "name" => "deployments", "namespaced" => true, "kind" => "Deployment" }, { "name" => "deployments", "namespaced" => true, "kind" => "Deployment" },
{ "name" => "secrets", "namespaced" => true, "kind" => "Secret" }, { "name" => "secrets", "namespaced" => true, "kind" => "Secret" },
...@@ -314,6 +344,16 @@ module KubernetesHelpers ...@@ -314,6 +344,16 @@ module KubernetesHelpers
} }
end end
def kube_metrics_v1beta1_discovery_body
{
"kind" => "APIResourceList",
"resources" => [
{ "name" => "nodes", "namespaced" => false, "kind" => "NodeMetrics" },
{ "name" => "pods", "namespaced" => true, "kind" => "PodMetrics" }
]
}
end
def kube_istio_discovery_body def kube_istio_discovery_body
{ {
"kind" => "APIResourceList", "kind" => "APIResourceList",
...@@ -442,6 +482,20 @@ module KubernetesHelpers ...@@ -442,6 +482,20 @@ module KubernetesHelpers
} }
end end
def nodes_body
{
"kind" => "NodeList",
"items" => [kube_node]
}
end
def nodes_metrics_body
{
"kind" => "List",
"items" => [kube_node_metrics]
}
end
def kube_logs_body def kube_logs_body
"2019-12-13T14:04:22.123456Z Log 1\n2019-12-13T14:04:23.123456Z Log 2\n2019-12-13T14:04:24.123456Z Log 3" "2019-12-13T14:04:22.123456Z Log 1\n2019-12-13T14:04:23.123456Z Log 2\n2019-12-13T14:04:24.123456Z Log 3"
end end
...@@ -494,6 +548,40 @@ module KubernetesHelpers ...@@ -494,6 +548,40 @@ module KubernetesHelpers
} }
end end
# This is a partial response, it will have many more elements in reality but
# these are the ones we care about at the moment
def kube_node
{
"metadata" => {
"name" => NODE_NAME
},
"status" => {
"capacity" => {
"cpu" => "2",
"memory" => "7657228Ki"
},
"allocatable" => {
"cpu" => "1930m",
"memory" => "5777164Ki"
}
}
}
end
# This is a partial response, it will have many more elements in reality but
# these are the ones we care about at the moment
def kube_node_metrics
{
"metadata" => {
"name" => NODE_NAME
},
"usage" => {
"cpu" => "144208668n",
"memory" => "1789048Ki"
}
}
end
# Similar to a kube_pod, but should contain a running service # Similar to a kube_pod, but should contain a running service
def kube_knative_pod(name: "kube-pod", namespace: "default", status: "Running") def kube_knative_pod(name: "kube-pod", namespace: "default", status: "Running")
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment