Commit 4fc03078 authored by Gary Holtz's avatar Gary Holtz

Adding a forced UTF-8 conversion to prevent encoding errors

Changelog: fixed
parent a9edc7fe
---
name: convert_diff_to_utf8_with_replacement_symbol
introduced_by_url:
rollout_issue_url:
milestone: '14.9'
type: development
group: group::code review
default_enabled: false
......@@ -15,6 +15,8 @@ module Gitlab
# https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193
ENCODING_CONFIDENCE_THRESHOLD = 50
INVALID_UTF_CHARACTER_PLACEHOLDER = "☃"
def encode!(message)
message = force_encode_utf8(message)
return message if message.valid_encoding?
......@@ -48,6 +50,14 @@ module Gitlab
detect && detect[:type] == :binary && detect[:confidence] == 100
end
def detect_invalid_utf8?(data)
data.include?(INVALID_UTF_CHARACTER_PLACEHOLDER)
end
def fix_invalid_utf8(data)
encode_utf8(data, replace: INVALID_UTF_CHARACTER_PLACEHOLDER)
end
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
# which is what we use below to keep a consistent behavior.
......
......@@ -157,6 +157,12 @@ module Gitlab
@iterator.size == 1 || !@enforce_limits || @expanded
end
def fix_invalid_diff!(diff)
converted_diff = Gitlab::EncodingHelper.fix_invalid_utf8(diff.diff)
diff.diff = converted_diff if Gitlab::EncodingHelper.detect_invalid_utf8?(converted_diff)
end
def each_gitaly_patch
i = @array.length
......@@ -187,6 +193,10 @@ module Gitlab
diff = Gitlab::Git::Diff.new(raw, expanded: expand_diff?)
if Feature.enabled?(:convert_diff_to_utf8_with_replacement_symbol, default_enabled: :yaml)
fix_invalid_diff!(diff)
end
if !expand_diff? && over_safe_limits?(i) && diff.line_count > 0
diff.collapse!
end
......
......@@ -775,6 +775,19 @@ RSpec.describe Gitlab::Git::DiffCollection, :seed_helper do
end
end
end
context 'when diff contains invalid characters' do
let(:bad_string) { [0xae].pack("C*") }
let(:bad_string_two) { [0x89].pack("C*") }
let(:collection) do
Gitlab::Git::DiffCollection.new([{ diff: bad_string }, { diff: bad_string_two }])
end
it 'will not error out' do
expect { Oj.dump(collection) }.not_to raise_error(EncodingError)
end
end
end
def fake_diff(line_length, line_count)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment