Fetch nodes data from cluster

- Fetches nodes status [CPU and Memory: allocatable and capacity] - Fetches nodes metrics [CPU and Memory: usage] - For this we had to add a new k8s api - Metrics are only fetched if cluster is provide by GCP, since other provider might not implement the metrics server - Exposes node data to the FE via ClusterSerializer#represent_list - Adds and refactor specs

Fetch nodes data from cluster
- Fetches nodes status [CPU and Memory: allocatable and capacity] - Fetches nodes metrics [CPU and Memory: usage] - For this we had to add a new k8s api - Metrics are only fetched if cluster is provide by GCP, since other provider might not implement the metrics server - Exposes node data to the FE via ClusterSerializer#represent_list - Adds and refactor specs
36359d99 · João Alexandre Cunha · Imre Farkas · d7c3d735 · 36359d99 · 36359d99
Commit 36359d99 authored May 14, 2020 by João Alexandre Cunha Committed by Imre Farkas May 14, 2020
8 changed files
--- a/app/models/clusters/cluster.rb
+++ b/app/models/clusters/cluster.rb
@@ -206,10 +206,16 @@ module Clusters
      end
    end

+    def nodes
+      with_reactive_cache do |data|
+        data[:nodes]
+      end
+    end
+
    def calculate_reactive_cache
      return unless enabled?

-      { connection_status: retrieve_connection_status }
+      { connection_status: retrieve_connection_status, nodes: retrieve_nodes }
    end

    def persisted_applications
@@ -348,32 +354,55 @@ module Clusters
    end

    def retrieve_connection_status
-      kubeclient.core_client.discover
-    rescue *Gitlab::Kubernetes::Errors::CONNECTION
-      :unreachable
-    rescue *Gitlab::Kubernetes::Errors::AUTHENTICATION
-      :authentication_failure
-    rescue Kubeclient::HttpError => e
-      kubeclient_error_status(e.message)
-    rescue => e
-      Gitlab::ErrorTracking.track_exception(e, cluster_id: id)
-
-      :unknown_failure
-    else
-      :connected
-    end
-
-    # KubeClient uses the same error class
-    # For connection errors (eg. timeout) and
-    # for Kubernetes errors.
-    def kubeclient_error_status(message)
-      if message&.match?(/timed out|timeout/i)
-        :unreachable
-      else
-        :authentication_failure
+      result = ::Gitlab::Kubernetes::KubeClient.graceful_request(id) { kubeclient.core_client.discover }
+      result[:status]
+    end
+
+    def retrieve_nodes
+      result = ::Gitlab::Kubernetes::KubeClient.graceful_request(id) { kubeclient.get_nodes }
+      cluster_nodes = result[:response].to_a
+
+      result = ::Gitlab::Kubernetes::KubeClient.graceful_request(id) { kubeclient.metrics_client.get_nodes }
+      nodes_metrics = result[:response].to_a
+
+      cluster_nodes.inject([]) do |memo, node|
+        sliced_node = filter_relevant_node_attributes(node)
+
+        matched_node_metric = nodes_metrics.find { |node_metric| node_metric.metadata.name == node.metadata.name }
+
+        sliced_node_metrics = matched_node_metric ? filter_relevant_node_metrics_attributes(matched_node_metric) : {}
+
+        memo << sliced_node.merge(sliced_node_metrics)
      end
    end

+    def filter_relevant_node_attributes(node)
+      {
+        'metadata' => {
+          'name' => node.metadata.name
+        },
+        'status' => {
+          'capacity' => {
+            'cpu' => node.status.capacity.cpu,
+            'memory' => node.status.capacity.memory
+          },
+          'allocatable' => {
+            'cpu' => node.status.allocatable.cpu,
+            'memory' => node.status.allocatable.memory
+          }
+        }
+      }
+    end
+
+    def filter_relevant_node_metrics_attributes(node_metrics)
+      {
+        'usage' => {
+          'cpu' => node_metrics.usage.cpu,
+          'memory' => node_metrics.usage.memory
+        }
+      }
+    end
+
    # To keep backward compatibility with AUTO_DEVOPS_DOMAIN
    # environment variable, we need to ensure KUBE_INGRESS_BASE_DOMAIN
    # is set if AUTO_DEVOPS_DOMAIN is set on any of the following options:

--- a/app/serializers/cluster_entity.rb
+++ b/app/serializers/cluster_entity.rb
@@ -7,12 +7,12 @@ class ClusterEntity < Grape::Entity
  expose :enabled
  expose :environment_scope
  expose :name
+  expose :nodes
  expose :status_name, as: :status
  expose :status_reason
+  expose :applications, using: ClusterApplicationEntity

  expose :path do |cluster|
    Clusters::ClusterPresenter.new(cluster).show_path # rubocop: disable CodeReuse/Presenter
  end
-
-  expose :applications, using: ClusterApplicationEntity
 end
--- a/app/serializers/cluster_serializer.rb
+++ b/app/serializers/cluster_serializer.rb
@@ -11,6 +11,7 @@ class ClusterSerializer < BaseSerializer
        :enabled,
        :environment_scope,
        :name,
+        :nodes,
        :path,
        :status
      ]

--- a/lib/gitlab/kubernetes/kube_client.rb
+++ b/lib/gitlab/kubernetes/kube_client.rb
@@ -20,6 +20,7 @@ module Gitlab
        extensions: { group: 'apis/extensions', version: 'v1beta1' },
        istio: { group: 'apis/networking.istio.io', version: 'v1alpha3' },
        knative: { group: 'apis/serving.knative.dev', version: 'v1alpha1' },
+        metrics: { group: 'apis/metrics.k8s.io', version: 'v1beta1' },
        networking: { group: 'apis/networking.k8s.io', version: 'v1' }
      }.freeze

@@ -34,7 +35,8 @@ module Gitlab
      end

      # Core API methods delegates to the core api group client
-      delegate :get_pods,
+      delegate :get_nodes,
+        :get_pods,
        :get_secrets,
        :get_config_map,
        :get_namespace,
@@ -102,6 +104,31 @@ module Gitlab
        }
      }.freeze

+      def self.graceful_request(cluster_id)
+        { status: :connected, response: yield }
+      rescue *Gitlab::Kubernetes::Errors::CONNECTION
+        { status: :unreachable }
+      rescue *Gitlab::Kubernetes::Errors::AUTHENTICATION
+        { status: :authentication_failure }
+      rescue Kubeclient::HttpError => e
+        { status: kubeclient_error_status(e.message) }
+      rescue => e
+        Gitlab::ErrorTracking.track_exception(e, cluster_id: cluster_id)
+
+        { status: :unknown_failure }
+      end
+
+      # KubeClient uses the same error class
+      # For connection errors (eg. timeout) and
+      # for Kubernetes errors.
+      def self.kubeclient_error_status(message)
+        if message&.match?(/timed out|timeout/i)
+          :unreachable
+        else
+          :authentication_failure
+        end
+      end
+
      # We disable redirects through 'http_max_redirects: 0',
      # so that KubeClient does not follow redirects and
      # expose internal services.

--- a/spec/lib/gitlab/kubernetes/kube_client_spec.rb
+++ b/spec/lib/gitlab/kubernetes/kube_client_spec.rb
@@ -64,6 +64,45 @@ describe Gitlab::Kubernetes::KubeClient do
    end
  end

+  describe '.graceful_request' do
+    context 'successful' do
+      before do
+        allow(client).to receive(:foo).and_return(true)
+      end
+
+      it 'returns connected status and foo response' do
+        result = described_class.graceful_request(1) { client.foo }
+
+        expect(result).to eq({ status: :connected, response: true })
+      end
+    end
+
+    context 'errored' do
+      using RSpec::Parameterized::TableSyntax
+
+      where(:error, :error_status) do
+        SocketError                     | :unreachable
+        OpenSSL::X509::CertificateError | :authentication_failure
+        StandardError                   | :unknown_failure
+        Kubeclient::HttpError.new(408, "timed out", nil) | :unreachable
+        Kubeclient::HttpError.new(408, "timeout", nil) | :unreachable
+        Kubeclient::HttpError.new(408, "", nil) | :authentication_failure
+      end
+
+      with_them do
+        before do
+          allow(client).to receive(:foo).and_raise(error)
+        end
+
+        it 'returns error status' do
+          result = described_class.graceful_request(1) { client.foo }
+
+          expect(result).to eq({ status: error_status })
+        end
+      end
+    end
+  end
+
  describe '#initialize' do
    shared_examples 'local address' do
      it 'blocks local addresses' do
@@ -188,10 +227,25 @@ describe Gitlab::Kubernetes::KubeClient do
    end
  end

+  describe '#metrics_client' do
+    subject { client.metrics_client }
+
+    it_behaves_like 'a Kubeclient'
+
+    it 'has the metrics API group endpoint' do
+      expect(subject.api_endpoint.to_s).to match(%r{\/apis\/metrics.k8s.io\Z})
+    end
+
+    it 'has the api_version' do
+      expect(subject.instance_variable_get(:@api_version)).to eq('v1beta1')
+    end
+  end
+
  describe 'core API' do
    let(:core_client) { client.core_client }

    [
+      :get_nodes,
      :get_pods,
      :get_secrets,
      :get_config_map,

--- a/spec/models/clusters/cluster_spec.rb
+++ b/spec/models/clusters/cluster_spec.rb
@@ -948,6 +948,22 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
    end
  end

+  describe '#nodes' do
+    let(:cluster) { create(:cluster) }
+
+    subject { cluster.nodes }
+
+    it { is_expected.to be_nil }
+
+    context 'with a cached status' do
+      before do
+        stub_reactive_cache(cluster, nodes: [kube_node])
+      end
+
+      it { is_expected.to eq([kube_node]) }
+    end
+  end
+
  describe '#calculate_reactive_cache' do
    subject { cluster.calculate_reactive_cache }

@@ -956,6 +972,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do

      it 'does not populate the cache' do
        expect(cluster).not_to receive(:retrieve_connection_status)
+        expect(cluster).not_to receive(:retrieve_nodes)

        is_expected.to be_nil
      end
@@ -964,12 +981,12 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
    context 'cluster is enabled' do
      let(:cluster) { create(:cluster, :provided_by_user, :group) }

-      context 'connection to the cluster is successful' do
-        before do
-          stub_kubeclient_discover(cluster.platform.api_url)
-        end
+      before do
+        stub_kubeclient_nodes_and_nodes_metrics(cluster.platform.api_url)
+      end

-        it { is_expected.to eq(connection_status: :connected) }
+      context 'connection to the cluster is successful' do
+        it { is_expected.to eq(connection_status: :connected, nodes: [kube_node.merge(kube_node_metrics)]) }
      end

      context 'cluster cannot be reached' do
@@ -978,7 +995,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
            .and_raise(SocketError)
        end

-        it { is_expected.to eq(connection_status: :unreachable) }
+        it { is_expected.to eq(connection_status: :unreachable, nodes: []) }
      end

      context 'cluster cannot be authenticated to' do
@@ -987,7 +1004,7 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
            .and_raise(OpenSSL::X509::CertificateError.new("Certificate error"))
        end

-        it { is_expected.to eq(connection_status: :authentication_failure) }
+        it { is_expected.to eq(connection_status: :authentication_failure, nodes: []) }
      end

      describe 'Kubeclient::HttpError' do
@@ -999,18 +1016,18 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
            .and_raise(Kubeclient::HttpError.new(error_code, error_message, nil))
        end

-        it { is_expected.to eq(connection_status: :authentication_failure) }
+        it { is_expected.to eq(connection_status: :authentication_failure, nodes: []) }

        context 'generic timeout' do
          let(:error_message) { 'Timed out connecting to server'}

-          it { is_expected.to eq(connection_status: :unreachable) }
+          it { is_expected.to eq(connection_status: :unreachable, nodes: []) }
        end

        context 'gateway timeout' do
          let(:error_message) { '504 Gateway Timeout for GET https://kubernetes.example.com/api/v1'}

-          it { is_expected.to eq(connection_status: :unreachable) }
+          it { is_expected.to eq(connection_status: :unreachable, nodes: []) }
        end
      end

@@ -1020,11 +1037,12 @@ describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
            .and_raise(StandardError)
        end

-        it { is_expected.to eq(connection_status: :unknown_failure) }
+        it { is_expected.to eq(connection_status: :unknown_failure, nodes: []) }

        it 'notifies Sentry' do
          expect(Gitlab::ErrorTracking).to receive(:track_exception)
            .with(instance_of(StandardError), hash_including(cluster_id: cluster.id))
+            .twice

          subject
        end

--- a/spec/serializers/cluster_serializer_spec.rb
+++ b/spec/serializers/cluster_serializer_spec.rb
@@ -3,23 +3,41 @@
 require 'spec_helper'

 describe ClusterSerializer do
+  let(:cluster) { create(:cluster, :project, provider_type: :user) }
+
+  describe '#represent_list' do
+    subject { described_class.new.represent_list(cluster).keys }
+
+    it 'serializes attrs correctly' do
+      is_expected.to contain_exactly(
+        :cluster_type,
+        :enabled,
+        :environment_scope,
+        :name,
+        :nodes,
+        :path,
+        :status)
+    end
+  end
+
  describe '#represent_status' do
-    subject { described_class.new.represent_status(cluster) }
+    subject { described_class.new.represent_status(cluster).keys }
+
+    context 'when provider type is gcp and cluster is errored' do
+      let(:cluster) do
+        errored_provider = create(:cluster_provider_gcp, :errored)

-    context 'when provider type is gcp' do
-      let(:cluster) { create(:cluster, provider_type: :gcp, provider_gcp: provider) }
-      let(:provider) { create(:cluster_provider_gcp, :errored) }
+        create(:cluster, provider_type: :gcp, provider_gcp: errored_provider)
+      end

-      it 'serializes only status' do
-        expect(subject.keys).to contain_exactly(:status, :status_reason, :applications)
+      it 'serializes attrs correctly' do
+        is_expected.to contain_exactly(:status, :status_reason, :applications)
      end
    end

    context 'when provider type is user' do
-      let(:cluster) { create(:cluster, provider_type: :user) }
-
-      it 'serializes only status' do
-        expect(subject.keys).to contain_exactly(:status, :status_reason, :applications)
+      it 'serializes attrs correctly' do
+        is_expected.to contain_exactly(:status, :status_reason, :applications)
      end
    end
  end

--- a/spec/support/helpers/kubernetes_helpers.rb
+++ b/spec/support/helpers/kubernetes_helpers.rb
@@ -3,6 +3,8 @@
 module KubernetesHelpers
  include Gitlab::Kubernetes

+  NODE_NAME = "gke-cluster-applications-default-pool-49b7f225-v527"
+
  def kube_response(body)
    { body: body.to_json }
  end
@@ -11,6 +13,14 @@ module KubernetesHelpers
    kube_response(kube_pods_body)
  end

+  def nodes_response
+    kube_response(nodes_body)
+  end
+
+  def nodes_metrics_response
+    kube_response(nodes_metrics_body)
+  end
+
  def kube_pod_response
    kube_response(kube_pod)
  end
@@ -34,6 +44,9 @@ module KubernetesHelpers
    WebMock
      .stub_request(:get, api_url + '/apis/rbac.authorization.k8s.io/v1')
      .to_return(kube_response(kube_v1_rbac_authorization_discovery_body))
+    WebMock
+      .stub_request(:get, api_url + '/apis/metrics.k8s.io/v1beta1')
+      .to_return(kube_response(kube_metrics_v1beta1_discovery_body))
  end

  def stub_kubeclient_discover_istio(api_url)
@@ -76,6 +89,22 @@ module KubernetesHelpers
    WebMock.stub_request(:get, pods_url).to_return(response || kube_pods_response)
  end

+  def stub_kubeclient_nodes(api_url)
+    stub_kubeclient_discover_base(api_url)
+
+    nodes_url = api_url + "/api/v1/nodes"
+
+    WebMock.stub_request(:get, nodes_url).to_return(nodes_response)
+  end
+
+  def stub_kubeclient_nodes_and_nodes_metrics(api_url)
+    stub_kubeclient_nodes(api_url)
+
+    nodes_url = api_url + "/apis/metrics.k8s.io/v1beta1/nodes"
+
+    WebMock.stub_request(:get, nodes_url).to_return(nodes_metrics_response)
+  end
+
  def stub_kubeclient_pods(namespace, status: nil)
    stub_kubeclient_discover(service.api_url)
    pods_url = service.api_url + "/api/v1/namespaces/#{namespace}/pods"
@@ -254,6 +283,7 @@ module KubernetesHelpers
    {
      "kind" => "APIResourceList",
      "resources" => [
+        { "name" => "nodes", "namespaced" => false, "kind" => "Node" },
        { "name" => "pods", "namespaced" => true, "kind" => "Pod" },
        { "name" => "deployments", "namespaced" => true, "kind" => "Deployment" },
        { "name" => "secrets", "namespaced" => true, "kind" => "Secret" },
@@ -314,6 +344,16 @@ module KubernetesHelpers
    }
  end

+  def kube_metrics_v1beta1_discovery_body
+    {
+      "kind" => "APIResourceList",
+      "resources" => [
+        { "name" => "nodes", "namespaced" => false, "kind" => "NodeMetrics" },
+        { "name" => "pods", "namespaced" => true, "kind" => "PodMetrics" }
+      ]
+    }
+  end
+
  def kube_istio_discovery_body
    {
      "kind" => "APIResourceList",
@@ -442,6 +482,20 @@ module KubernetesHelpers
    }
  end

+  def nodes_body
+    {
+      "kind" => "NodeList",
+      "items" => [kube_node]
+    }
+  end
+
+  def nodes_metrics_body
+    {
+      "kind" => "List",
+      "items" => [kube_node_metrics]
+    }
+  end
+
  def kube_logs_body
    "2019-12-13T14:04:22.123456Z Log 1\n2019-12-13T14:04:23.123456Z Log 2\n2019-12-13T14:04:24.123456Z Log 3"
  end
@@ -494,6 +548,40 @@ module KubernetesHelpers
    }
  end

+  # This is a partial response, it will have many more elements in reality but
+  # these are the ones we care about at the moment
+  def kube_node
+    {
+      "metadata" => {
+        "name" => NODE_NAME
+      },
+      "status" => {
+        "capacity" => {
+          "cpu" => "2",
+          "memory" => "7657228Ki"
+        },
+        "allocatable" => {
+          "cpu" => "1930m",
+          "memory" => "5777164Ki"
+        }
+      }
+    }
+  end
+
+  # This is a partial response, it will have many more elements in reality but
+  # these are the ones we care about at the moment
+  def kube_node_metrics
+    {
+      "metadata" => {
+        "name" => NODE_NAME
+      },
+      "usage" => {
+        "cpu" => "144208668n",
+        "memory" => "1789048Ki"
+      }
+    }
+  end
+
  # Similar to a kube_pod, but should contain a running service
  def kube_knative_pod(name: "kube-pod", namespace: "default", status: "Running")
    {