Commit 6a66e4a1 authored by Dylan Griffith's avatar Dylan Griffith

Don't remove failed install pods

We want to keep failed install pods around so that it is easier to debug
why a failure occured. With this change we also need to ensure that we
remove a previous pod with the same name before installing so that
re-install does not fail.

Another change here is that we no longer need to catch errors from
delete_pod! in CheckInstallationProgressService as we now catch the
ResourceNotFoundError in Helm::Api. The catch statement in
CheckInstallationProgressService was also probably too broad before and
should have been narrowed down simply to ResourceNotFoundError.
parent 242baf12
...@@ -29,17 +29,13 @@ module Clusters ...@@ -29,17 +29,13 @@ module Clusters
end end
def on_failed def on_failed
app.make_errored!('Installation failed') app.make_errored!("Installation failed. Check pod logs for #{install_command.pod_name} for more details.")
ensure
remove_installation_pod
end end
def check_timeout def check_timeout
if timeouted? if timeouted?
begin begin
app.make_errored!('Installation timed out') app.make_errored!("Installation timed out. Check pod logs for #{install_command.pod_name} for more details.")
ensure
remove_installation_pod
end end
else else
ClusterWaitForAppInstallationWorker.perform_in( ClusterWaitForAppInstallationWorker.perform_in(
...@@ -53,9 +49,6 @@ module Clusters ...@@ -53,9 +49,6 @@ module Clusters
def remove_installation_pod def remove_installation_pod
helm_api.delete_pod!(install_command.pod_name) helm_api.delete_pod!(install_command.pod_name)
rescue => e
Rails.logger.error("Kubernetes error: #{e.class.name} #{e.message}")
# no-op
end end
def installation_phase def installation_phase
......
---
title: Don't remove failed install pods after installing GitLab managed applications
merge_request: 23350
author:
type: changed
...@@ -16,12 +16,16 @@ module Gitlab ...@@ -16,12 +16,16 @@ module Gitlab
create_cluster_role_binding(command) create_cluster_role_binding(command)
create_config_map(command) create_config_map(command)
delete_pod!(command.pod_name)
kubeclient.create_pod(command.pod_resource) kubeclient.create_pod(command.pod_resource)
end end
def update(command) def update(command)
namespace.ensure_exists! namespace.ensure_exists!
update_config_map(command) update_config_map(command)
delete_pod!(command.pod_name)
kubeclient.create_pod(command.pod_resource) kubeclient.create_pod(command.pod_resource)
end end
...@@ -42,6 +46,8 @@ module Gitlab ...@@ -42,6 +46,8 @@ module Gitlab
def delete_pod!(pod_name) def delete_pod!(pod_name)
kubeclient.delete_pod(pod_name, namespace.name) kubeclient.delete_pod(pod_name, namespace.name)
rescue ::Kubeclient::ResourceNotFoundError
# no-op
end end
def get_config_map(config_map_name) def get_config_map(config_map_name)
......
...@@ -40,6 +40,7 @@ describe Gitlab::Kubernetes::Helm::Api do ...@@ -40,6 +40,7 @@ describe Gitlab::Kubernetes::Helm::Api do
allow(client).to receive(:create_config_map).and_return(nil) allow(client).to receive(:create_config_map).and_return(nil)
allow(client).to receive(:create_service_account).and_return(nil) allow(client).to receive(:create_service_account).and_return(nil)
allow(client).to receive(:create_cluster_role_binding).and_return(nil) allow(client).to receive(:create_cluster_role_binding).and_return(nil)
allow(client).to receive(:delete_pod).and_return(nil)
allow(namespace).to receive(:ensure_exists!).once allow(namespace).to receive(:ensure_exists!).once
end end
...@@ -50,6 +51,13 @@ describe Gitlab::Kubernetes::Helm::Api do ...@@ -50,6 +51,13 @@ describe Gitlab::Kubernetes::Helm::Api do
subject.install(command) subject.install(command)
end end
it 'removes an existing pod before installing' do
expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps').once.ordered
expect(client).to receive(:create_pod).once.ordered
subject.install(command)
end
context 'with a ConfigMap' do context 'with a ConfigMap' do
let(:resource) { Gitlab::Kubernetes::ConfigMap.new(application_name, files).generate } let(:resource) { Gitlab::Kubernetes::ConfigMap.new(application_name, files).generate }
...@@ -180,6 +188,7 @@ describe Gitlab::Kubernetes::Helm::Api do ...@@ -180,6 +188,7 @@ describe Gitlab::Kubernetes::Helm::Api do
allow(client).to receive(:update_config_map).and_return(nil) allow(client).to receive(:update_config_map).and_return(nil)
allow(client).to receive(:create_pod).and_return(nil) allow(client).to receive(:create_pod).and_return(nil)
allow(client).to receive(:delete_pod).and_return(nil)
end end
it 'ensures the namespace exists before creating the pod' do it 'ensures the namespace exists before creating the pod' do
...@@ -189,6 +198,13 @@ describe Gitlab::Kubernetes::Helm::Api do ...@@ -189,6 +198,13 @@ describe Gitlab::Kubernetes::Helm::Api do
subject.update(command) subject.update(command)
end end
it 'removes an existing pod before updating' do
expect(client).to receive(:delete_pod).with('upgrade-app-name', 'gitlab-managed-apps').once.ordered
expect(client).to receive(:create_pod).once.ordered
subject.update(command)
end
it 'updates the config map on kubeclient when one exists' do it 'updates the config map on kubeclient when one exists' do
resource = Gitlab::Kubernetes::ConfigMap.new( resource = Gitlab::Kubernetes::ConfigMap.new(
application_name, files application_name, files
...@@ -224,9 +240,18 @@ describe Gitlab::Kubernetes::Helm::Api do ...@@ -224,9 +240,18 @@ describe Gitlab::Kubernetes::Helm::Api do
describe '#delete_pod!' do describe '#delete_pod!' do
it 'deletes the POD from kubernetes cluster' do it 'deletes the POD from kubernetes cluster' do
expect(client).to receive(:delete_pod).with(command.pod_name, gitlab_namespace).once expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps').once
subject.delete_pod!(command.pod_name) subject.delete_pod!('install-app-name')
end
context 'when the resource being deleted does not exist' do
it 'catches the error' do
expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps')
.and_raise(Kubeclient::ResourceNotFoundError.new(404, 'Not found', nil))
subject.delete_pod!('install-app-name')
end
end end
end end
......
...@@ -8,14 +8,6 @@ describe Clusters::Applications::CheckInstallationProgressService do ...@@ -8,14 +8,6 @@ describe Clusters::Applications::CheckInstallationProgressService do
let(:phase) { Gitlab::Kubernetes::Pod::UNKNOWN } let(:phase) { Gitlab::Kubernetes::Pod::UNKNOWN }
let(:errors) { nil } let(:errors) { nil }
shared_examples 'a terminated installation' do
it 'removes the installation POD' do
expect(service).to receive(:remove_installation_pod).once
service.execute
end
end
shared_examples 'a not yet terminated installation' do |a_phase| shared_examples 'a not yet terminated installation' do |a_phase|
let(:phase) { a_phase } let(:phase) { a_phase }
...@@ -39,15 +31,13 @@ describe Clusters::Applications::CheckInstallationProgressService do ...@@ -39,15 +31,13 @@ describe Clusters::Applications::CheckInstallationProgressService do
context 'when timeouted' do context 'when timeouted' do
let(:application) { create(:clusters_applications_helm, :timeouted) } let(:application) { create(:clusters_applications_helm, :timeouted) }
it_behaves_like 'a terminated installation'
it 'make the application errored' do it 'make the application errored' do
expect(ClusterWaitForAppInstallationWorker).not_to receive(:perform_in) expect(ClusterWaitForAppInstallationWorker).not_to receive(:perform_in)
service.execute service.execute
expect(application).to be_errored expect(application).to be_errored
expect(application.status_reason).to match(/\btimed out\b/) expect(application.status_reason).to eq("Installation timed out. Check pod logs for install-helm for more details.")
end end
end end
end end
...@@ -66,7 +56,11 @@ describe Clusters::Applications::CheckInstallationProgressService do ...@@ -66,7 +56,11 @@ describe Clusters::Applications::CheckInstallationProgressService do
expect(service).to receive(:installation_phase).once.and_return(phase) expect(service).to receive(:installation_phase).once.and_return(phase)
end end
it_behaves_like 'a terminated installation' it 'removes the installation POD' do
expect(service).to receive(:remove_installation_pod).once
service.execute
end
it 'make the application installed' do it 'make the application installed' do
expect(ClusterWaitForAppInstallationWorker).not_to receive(:perform_in) expect(ClusterWaitForAppInstallationWorker).not_to receive(:perform_in)
...@@ -86,13 +80,11 @@ describe Clusters::Applications::CheckInstallationProgressService do ...@@ -86,13 +80,11 @@ describe Clusters::Applications::CheckInstallationProgressService do
expect(service).to receive(:installation_phase).once.and_return(phase) expect(service).to receive(:installation_phase).once.and_return(phase)
end end
it_behaves_like 'a terminated installation'
it 'make the application errored' do it 'make the application errored' do
service.execute service.execute
expect(application).to be_errored expect(application).to be_errored
expect(application.status_reason).to eq("Installation failed") expect(application.status_reason).to eq("Installation failed. Check pod logs for install-helm for more details.")
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment