Commit d67b22ad authored by Gary Holtz's avatar Gary Holtz

Moving code location and adding individual specs

parent 4fc03078
......@@ -15,7 +15,7 @@ module Gitlab
# https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193
ENCODING_CONFIDENCE_THRESHOLD = 50
INVALID_UTF_CHARACTER_PLACEHOLDER = "☃"
UNICODE_REPLACEMENT_CHARACTER = "�"
def encode!(message)
message = force_encode_utf8(message)
......@@ -50,14 +50,6 @@ module Gitlab
detect && detect[:type] == :binary && detect[:confidence] == 100
end
def detect_invalid_utf8?(data)
data.include?(INVALID_UTF_CHARACTER_PLACEHOLDER)
end
def fix_invalid_utf8(data)
encode_utf8(data, replace: INVALID_UTF_CHARACTER_PLACEHOLDER)
end
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
# which is what we use below to keep a consistent behavior.
......@@ -75,6 +67,10 @@ module Gitlab
message.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
end
def encode_utf8_with_replacement_character(data)
encode_utf8(data, replace: UNICODE_REPLACEMENT_CHARACTER)
end
def encode_utf8(message, replace: "")
message = force_encode_utf8(message)
return message if message.valid_encoding?
......
......@@ -140,7 +140,7 @@ module Gitlab
text.start_with?(BINARY_NOTICE_PATTERN)
end
end
def initialize(raw_diff, expanded: true)
def initialize(raw_diff, expanded: true, replace_invalid_utf8_chars: true)
@expanded = expanded
case raw_diff
......@@ -157,6 +157,10 @@ module Gitlab
else
raise "Invalid raw diff type: #{raw_diff.class}"
end
if Feature.enabled?(:convert_diff_to_utf8_with_replacement_symbol, default_enabled: :yaml)
@diff = Gitlab::EncodingHelper.encode_utf8_with_replacement_character(@diff) if replace_invalid_utf8_chars && !detect_binary?(@diff)
end
end
def to_hash
......
......@@ -157,12 +157,6 @@ module Gitlab
@iterator.size == 1 || !@enforce_limits || @expanded
end
def fix_invalid_diff!(diff)
converted_diff = Gitlab::EncodingHelper.fix_invalid_utf8(diff.diff)
diff.diff = converted_diff if Gitlab::EncodingHelper.detect_invalid_utf8?(converted_diff)
end
def each_gitaly_patch
i = @array.length
......@@ -193,10 +187,6 @@ module Gitlab
diff = Gitlab::Git::Diff.new(raw, expanded: expand_diff?)
if Feature.enabled?(:convert_diff_to_utf8_with_replacement_symbol, default_enabled: :yaml)
fix_invalid_diff!(diff)
end
if !expand_diff? && over_safe_limits?(i) && diff.line_count > 0
diff.collapse!
end
......
......@@ -775,19 +775,6 @@ RSpec.describe Gitlab::Git::DiffCollection, :seed_helper do
end
end
end
context 'when diff contains invalid characters' do
let(:bad_string) { [0xae].pack("C*") }
let(:bad_string_two) { [0x89].pack("C*") }
let(:collection) do
Gitlab::Git::DiffCollection.new([{ diff: bad_string }, { diff: bad_string_two }])
end
it 'will not error out' do
expect { Oj.dump(collection) }.not_to raise_error(EncodingError)
end
end
end
def fake_diff(line_length, line_count)
......
......@@ -161,6 +161,31 @@ EOT
expect(diff).not_to have_binary_notice
end
end
context 'when diff contains invalid characters' do
let(:bad_string) { [0xae].pack("C*") }
let(:bad_string_two) { [0x89].pack("C*") }
context 'when replace_invalid_utf8_chars is true' do
let(:diff) { described_class.new(@raw_diff_hash.merge({ diff: bad_string })) }
let(:diff_two) { described_class.new(@raw_diff_hash.merge({ diff: bad_string_two })) }
it 'will convert invalid characters and not cause an encoding error' do
expect { Oj.dump(diff) }.not_to raise_error(EncodingError)
expect { Oj.dump(diff_two) }.not_to raise_error(EncodingError)
end
end
context 'when replace_invalid_utf8_chars is false' do
let(:diff) { described_class.new(@raw_diff_hash.merge({ diff: bad_string }), replace_invalid_utf8_chars: false) }
let(:diff_two) { described_class.new(@raw_diff_hash.merge({ diff: bad_string_two }), replace_invalid_utf8_chars: false) }
it 'will not convert any invalid characters' do
expect(diff).not_to include(Gitlab::EncodingHelper::UNICODE_REPLACEMENT_CHARACTER)
expect(diff_two).not_to include(Gitlab::EncodingHelper::UNICODE_REPLACEMENT_CHARACTER)
end
end
end
end
describe 'straight diffs' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment