Commit 2a2579fc authored by Ramya Authappan's avatar Ramya Authappan

Merge branch 'jmd-improve-gitaly-e2e-stop-start-times' into 'master'

Use docker pause instead of stop to minimise test downtime

See merge request gitlab-org/gitlab!79912
parents 83da1103 68ca6605
...@@ -50,6 +50,7 @@ module QA ...@@ -50,6 +50,7 @@ module QA
def stop_primary_node def stop_primary_node
stop_node(@primary_node) stop_node(@primary_node)
wait_until_node_is_removed_from_healthy_storages(@primary_node)
end end
def start_primary_node def start_primary_node
...@@ -67,6 +68,7 @@ module QA ...@@ -67,6 +68,7 @@ module QA
def stop_secondary_node def stop_secondary_node
stop_node(@secondary_node) stop_node(@secondary_node)
wait_until_node_is_removed_from_healthy_storages(@stop_secondary_node)
end end
def start_secondary_node def start_secondary_node
...@@ -75,6 +77,7 @@ module QA ...@@ -75,6 +77,7 @@ module QA
def stop_tertiary_node def stop_tertiary_node
stop_node(@tertiary_node) stop_node(@tertiary_node)
wait_until_node_is_removed_from_healthy_storages(@tertiary_node)
end end
def start_tertiary_node def start_tertiary_node
...@@ -82,20 +85,39 @@ module QA ...@@ -82,20 +85,39 @@ module QA
end end
def start_node(name) def start_node(name)
shell "docker start #{name}" state = node_state(name)
end return if state == "running"
if state == "paused"
shell "docker unpause #{name}"
end
if state == "stopped"
shell "docker start #{name}"
end
def stop_node(name)
shell "docker stop #{name}"
wait_until_shell_command_matches( wait_until_shell_command_matches(
"docker inspect -f {{.State.Running}} #{name}", "docker inspect -f {{.State.Running}} #{name}",
/false/, /true/,
sleep_interval: 3, sleep_interval: 3,
max_duration: 180, max_duration: 180,
retry_on_exception: true retry_on_exception: true
) )
end end
def stop_node(name)
shell "docker pause #{name}"
end
def node_state(name)
state = "stopped"
wait_until_shell_command("docker inspect -f {{.State.Status}} #{name}") do |line|
QA::Runtime::Logger.debug(line)
break state = "running" if line.include?("running")
break state = "paused" if line.include?("paused")
end
end
def clear_replication_queue def clear_replication_queue
QA::Runtime::Logger.info("Clearing the replication queue") QA::Runtime::Logger.info("Clearing the replication queue")
shell sql_to_docker_exec_cmd( shell sql_to_docker_exec_cmd(
...@@ -204,9 +226,8 @@ module QA ...@@ -204,9 +226,8 @@ module QA
def wait_for_praefect def wait_for_praefect
QA::Runtime::Logger.info("Waiting for health check on praefect") QA::Runtime::Logger.info("Waiting for health check on praefect")
Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
# praefect runs a grpc server on port 2305, which will return an error 'Connection refused' until such time it is ready wait_until_shell_command("docker exec #{@praefect} gitlab-ctl status praefect") do |line|
wait_until_shell_command("docker exec #{@gitaly_cluster} bash -c 'curl #{@praefect}:2305'") do |line| break true if line.include?('run: praefect: ')
break if line.include?('curl: (1) Received HTTP/0.9 when not allowed')
QA::Runtime::Logger.debug(line.chomp) QA::Runtime::Logger.debug(line.chomp)
end end
...@@ -269,9 +290,8 @@ module QA ...@@ -269,9 +290,8 @@ module QA
def wait_for_gitaly_health_check(node) def wait_for_gitaly_health_check(node)
QA::Runtime::Logger.info("Waiting for health check on #{node}") QA::Runtime::Logger.info("Waiting for health check on #{node}")
Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
# gitaly runs a grpc server on port 8075, which will return an error 'Connection refused' until such time it is ready wait_until_shell_command("docker exec #{node} gitlab-ctl status gitaly") do |line|
wait_until_shell_command("docker exec #{@praefect} bash -c 'curl #{node}:8075'") do |line| break true if line.include?('run: gitaly: ')
break if line.include?('curl: (1) Received HTTP/0.9 when not allowed')
QA::Runtime::Logger.debug(line.chomp) QA::Runtime::Logger.debug(line.chomp)
end end
......
...@@ -9,37 +9,30 @@ module QA ...@@ -9,37 +9,30 @@ module QA
project = nil project = nil
let(:intial_commit_message) { 'Initial commit' } let(:intial_commit_message) { 'Initial commit' }
let(:first_added_commit_message) { 'pushed to primary gitaly node' } let(:first_added_commit_message) { 'first_added_commit_message to primary gitaly node' }
let(:second_added_commit_message) { 'commit to failover node' } let(:second_added_commit_message) { 'second_added_commit_message to failover node' }
before(:context) do before(:context) do
# Reset the cluster in case previous tests left it in a bad state
praefect_manager.start_all_nodes praefect_manager.start_all_nodes
project = Resource::Project.fabricate! do |project| project = Resource::Project.fabricate! do |project|
project.name = "gitaly_cluster" project.name = "gitaly_cluster"
project.initialize_with_readme = true project.initialize_with_readme = true
end end
end # We need to ensure that the the project is replicated to all nodes before proceeding with this test
praefect_manager.wait_for_replication(project.id)
after do
praefect_manager.start_all_nodes
end end
it 'automatically fails over', testcase: 'https://gitlab.com/gitlab-org/gitlab/-/quality/test_cases/347830' do it 'automatically fails over', testcase: 'https://gitlab.com/gitlab-org/gitlab/-/quality/test_cases/347830' do
# Create a new project with a commit and wait for it to replicate # stop other nodes, so we can control which node the commit is sent to
# make sure that our project is published to the 'primary' node
praefect_manager.stop_secondary_node praefect_manager.stop_secondary_node
praefect_manager.stop_tertiary_node praefect_manager.stop_tertiary_node
praefect_manager.wait_for_secondary_node_health_check_failure
praefect_manager.wait_for_tertiary_node_health_check_failure
Resource::Repository::ProjectPush.fabricate! do |push| Resource::Repository::ProjectPush.fabricate! do |push|
push.project = project push.project = project
push.commit_message = first_added_commit_message push.commit_message = first_added_commit_message
push.new_branch = false push.new_branch = false
push.file_content = "This should exist on all nodes" push.file_content = 'This file created on gitaly1 while gitaly2/gitaly3 not running'
end end
praefect_manager.start_all_nodes praefect_manager.start_all_nodes
...@@ -56,7 +49,7 @@ module QA ...@@ -56,7 +49,7 @@ module QA
commit.add_files([ commit.add_files([
{ {
file_path: "file-#{SecureRandom.hex(8)}", file_path: "file-#{SecureRandom.hex(8)}",
content: 'This should exist on one node before reconciliation' content: 'This is created on gitaly2/gitaly3 while gitaly1 is unavailable'
} }
]) ])
end end
......
...@@ -4,7 +4,7 @@ require 'parallel' ...@@ -4,7 +4,7 @@ require 'parallel'
module QA module QA
RSpec.describe 'Create' do RSpec.describe 'Create' do
context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_cluster, :skip_live_env, quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/346453', type: :flaky } do context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_cluster, :skip_live_env do
let(:praefect_manager) { Service::PraefectManager.new } let(:praefect_manager) { Service::PraefectManager.new }
let(:project) do let(:project) do
Resource::Project.fabricate! do |project| Resource::Project.fabricate! do |project|
...@@ -15,12 +15,10 @@ module QA ...@@ -15,12 +15,10 @@ module QA
before do before do
praefect_manager.start_all_nodes praefect_manager.start_all_nodes
praefect_manager.start_praefect
end end
after do after do
praefect_manager.start_all_nodes praefect_manager.start_all_nodes
praefect_manager.start_praefect
praefect_manager.clear_replication_queue praefect_manager.clear_replication_queue
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment