Commit 73e53b95 authored by Imre Farkas's avatar Imre Farkas

Merge branch 'sh-fix-invalid-job-traces-utf8' into 'master'

Drop invalid UTF-8 when displaying in job logs

See merge request gitlab-org/gitlab!66423
parents 53744f17 c481f5a6
......@@ -21,14 +21,19 @@ module Gitlab
end
def to_h
# Without force encoding to UTF-8 we could get an error
# when serializing the Hash to JSON.
# Encoding::UndefinedConversionError:
# "\xE2" from ASCII-8BIT to UTF-8
{ text: text.force_encoding('UTF-8') }.tap do |result|
{ text: encode_text(text) }.tap do |result|
result[:style] = style.to_s if style.set?
end
end
# Without forcing the encoding to UTF-8 and then dropping
# invalid UTF-8 sequences we can get an error when serializing
# the Hash to JSON.
# Encoding::UndefinedConversionError:
# "\xE2" from ASCII-8BIT to UTF-8
def encode_text(text)
text.force_encoding(Encoding::UTF_8).encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
end
end
attr_reader :offset, :sections, :segments, :current_segment,
......
......@@ -32,4 +32,14 @@ RSpec.describe Ci::BuildTrace do
{ offset: 0, content: [{ text: 'the-stream' }] }
])
end
context 'with invalid UTF-8 data' do
let(:data) { StringIO.new("UTF-8 dashes here: ───\n🐤🐤🐤🐤\xF0\x9F\x90\n") }
it 'returns valid UTF-8 data', :aggregate_failures do
expect(subject.lines[0]).to eq({ offset: 0, content: [{ text: 'UTF-8 dashes here: ───' }] } )
# Each of the dashes is 3 bytes, so we get 19 + 9 + 1 = 29
expect(subject.lines[1]).to eq({ offset: 29, content: [{ text: '🐤🐤🐤🐤�' }] } )
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment