Commit fd392cd7 authored by Stan Hu's avatar Stan Hu

Avoid process deadlock in popen by consuming input pipes

A process that spews a lot of output to stderr or stdout could stall out
due to the pipe buffer being full. As described in https://bugs.ruby-lang.org/issues/9082,
we can use the trick used in Ruby's capture3 function to read the pipes in separate
threads.

Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/6895
parent 226d4d0e
---
title: Avoid process deadlock in popen by consuming input pipes
merge_request: 20600
author:
type: fixed
...@@ -21,6 +21,10 @@ module Gitlab ...@@ -21,6 +21,10 @@ module Gitlab
Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr| Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr|
stdout.set_encoding(Encoding::ASCII_8BIT) stdout.set_encoding(Encoding::ASCII_8BIT)
# stderr and stdout pipes can block if stderr/stdout aren't drained: https://bugs.ruby-lang.org/issues/9082
# Mimic what Ruby does with capture3: https://github.com/ruby/ruby/blob/1ec544695fa02d714180ef9c34e755027b6a2103/lib/open3.rb#L257-L273
err_reader = Thread.new { stderr.read }
yield(stdin) if block_given? yield(stdin) if block_given?
stdin.close stdin.close
...@@ -32,7 +36,7 @@ module Gitlab ...@@ -32,7 +36,7 @@ module Gitlab
cmd_output << stdout.read cmd_output << stdout.read
end end
cmd_output << stderr.read cmd_output << err_reader.value
cmd_status = wait_thr.value.exitstatus cmd_status = wait_thr.value.exitstatus
end end
...@@ -55,16 +59,20 @@ module Gitlab ...@@ -55,16 +59,20 @@ module Gitlab
rerr, werr = IO.pipe rerr, werr = IO.pipe
pid = Process.spawn(vars, *cmd, out: wout, err: werr, chdir: path, pgroup: true) pid = Process.spawn(vars, *cmd, out: wout, err: werr, chdir: path, pgroup: true)
# stderr and stdout pipes can block if stderr/stdout aren't drained: https://bugs.ruby-lang.org/issues/9082
# Mimic what Ruby does with capture3: https://github.com/ruby/ruby/blob/1ec544695fa02d714180ef9c34e755027b6a2103/lib/open3.rb#L257-L273
out_reader = Thread.new { rout.read }
err_reader = Thread.new { rerr.read }
begin begin
status = process_wait_with_timeout(pid, timeout)
# close write ends so we could read them # close write ends so we could read them
wout.close wout.close
werr.close werr.close
cmd_output = rout.readlines.join status = process_wait_with_timeout(pid, timeout)
cmd_output << rerr.readlines.join # Copying the behaviour of `popen` which merges stderr into output
cmd_output = out_reader.value
cmd_output << err_reader.value # Copying the behaviour of `popen` which merges stderr into output
[cmd_output, status.exitstatus] [cmd_output, status.exitstatus]
rescue Timeout::Error => e rescue Timeout::Error => e
......
...@@ -34,11 +34,16 @@ module Gitlab ...@@ -34,11 +34,16 @@ module Gitlab
start = Time.now start = Time.now
Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr| Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr|
# stderr and stdout pipes can block if stderr/stdout aren't drained: https://bugs.ruby-lang.org/issues/9082
# Mimic what Ruby does with capture3: https://github.com/ruby/ruby/blob/1ec544695fa02d714180ef9c34e755027b6a2103/lib/open3.rb#L257-L273
out_reader = Thread.new { stdout.read }
err_reader = Thread.new { stderr.read }
yield(stdin) if block_given? yield(stdin) if block_given?
stdin.close stdin.close
cmd_stdout = stdout.read cmd_stdout = out_reader.value
cmd_stderr = stderr.read cmd_stderr = err_reader.value
cmd_status = wait_thr.value cmd_status = wait_thr.value
end end
......
...@@ -2,6 +2,9 @@ require 'spec_helper' ...@@ -2,6 +2,9 @@ require 'spec_helper'
describe 'Gitlab::Git::Popen' do describe 'Gitlab::Git::Popen' do
let(:path) { Rails.root.join('tmp').to_s } let(:path) { Rails.root.join('tmp').to_s }
let(:test_string) { 'The quick brown fox jumped over the lazy dog' }
# The pipe buffer is typically 64K. This string is about 440K.
let(:spew_command) { ['bash', '-c', "for i in {1..10000}; do echo '#{test_string}' 1>&2; done"] }
let(:klass) do let(:klass) do
Class.new(Object) do Class.new(Object) do
...@@ -70,6 +73,15 @@ describe 'Gitlab::Git::Popen' do ...@@ -70,6 +73,15 @@ describe 'Gitlab::Git::Popen' do
end end
end end
end end
context 'with a process that writes a lot of data to stderr' do
it 'returns zero' do
output, status = klass.new.popen(spew_command, path)
expect(output).to include(test_string)
expect(status).to eq(0)
end
end
end end
context 'popen_with_timeout' do context 'popen_with_timeout' do
...@@ -85,6 +97,17 @@ describe 'Gitlab::Git::Popen' do ...@@ -85,6 +97,17 @@ describe 'Gitlab::Git::Popen' do
it { expect(output).to include('tests') } it { expect(output).to include('tests') }
end end
context 'multi-line string' do
let(:test_string) { "this is 1 line\n2nd line\n3rd line\n" }
let(:result) { klass.new.popen_with_timeout(['echo', test_string], timeout, path) }
let(:output) { result.first }
let(:status) { result.last }
it { expect(status).to be_zero }
# echo adds its own line
it { expect(output).to eq(test_string + "\n") }
end
context 'non-zero status' do context 'non-zero status' do
let(:result) { klass.new.popen_with_timeout(%w(cat NOTHING), timeout, path) } let(:result) { klass.new.popen_with_timeout(%w(cat NOTHING), timeout, path) }
let(:output) { result.first } let(:output) { result.first }
...@@ -110,6 +133,13 @@ describe 'Gitlab::Git::Popen' do ...@@ -110,6 +133,13 @@ describe 'Gitlab::Git::Popen' do
it "handles processes that do not shutdown correctly" do it "handles processes that do not shutdown correctly" do
expect { klass.new.popen_with_timeout(['bash', '-c', "trap -- '' SIGTERM; sleep 1000"], timeout, path) }.to raise_error(Timeout::Error) expect { klass.new.popen_with_timeout(['bash', '-c', "trap -- '' SIGTERM; sleep 1000"], timeout, path) }.to raise_error(Timeout::Error)
end end
it 'handles process that writes a lot of data to stderr' do
output, status = klass.new.popen_with_timeout(spew_command, timeout, path)
expect(output).to include(test_string)
expect(status).to eq(0)
end
end end
context 'timeout period' do context 'timeout period' do
......
...@@ -55,6 +55,19 @@ describe Gitlab::Popen do ...@@ -55,6 +55,19 @@ describe Gitlab::Popen do
end end
end end
context 'with a process that writes a lot of data to stderr' do
let(:test_string) { 'The quick brown fox jumped over the lazy dog' }
# The pipe buffer is typically 64K. This string is about 440K.
let(:spew_command) { ['bash', '-c', "for i in {1..10000}; do echo '#{test_string}' 1>&2; done"] }
it 'returns zero' do
output, status = @klass.new.popen(spew_command, path)
expect(output).to include(test_string)
expect(status).to eq(0)
end
end
context 'without a directory argument' do context 'without a directory argument' do
before do before do
@output, @status = @klass.new.popen(%w(ls)) @output, @status = @klass.new.popen(%w(ls))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment