Commit 396b8f91 authored by Sean McGivern's avatar Sean McGivern

Fix saving diffs that are not valid UTF-8

Previously, we used Psych, which would:

1. Check if a string was encoded as binary, and not ASCII-compatible.
2. Add the !binary tag in that case.
3. Convert to base64.

We need to do the same thing, using a new column in place of the tag.
parent 0c563225
...@@ -236,10 +236,21 @@ class MergeRequestDiff < ActiveRecord::Base ...@@ -236,10 +236,21 @@ class MergeRequestDiff < ActiveRecord::Base
def create_merge_request_diff_files(diffs) def create_merge_request_diff_files(diffs)
rows = diffs.map.with_index do |diff, index| rows = diffs.map.with_index do |diff, index|
diff.to_hash.merge( diff_hash = diff.to_hash.merge(
binary: false,
merge_request_diff_id: self.id, merge_request_diff_id: self.id,
relative_order: index relative_order: index
) )
# Compatibility with old diffs created with Psych.
diff_hash.tap do |hash|
diff_text = hash[:diff]
if diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?
hash[:binary] = true
hash[:diff] = [diff_text].pack('m0')
end
end
end end
Gitlab::Database.bulk_insert('merge_request_diff_files', rows) Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
...@@ -268,9 +279,7 @@ class MergeRequestDiff < ActiveRecord::Base ...@@ -268,9 +279,7 @@ class MergeRequestDiff < ActiveRecord::Base
st_diffs st_diffs
end end
elsif merge_request_diff_files.present? elsif merge_request_diff_files.present?
merge_request_diff_files merge_request_diff_files.map(&:to_hash)
.as_json(only: Gitlab::Git::Diff::SERIALIZE_KEYS)
.map(&:with_indifferent_access)
end end
end end
......
...@@ -8,4 +8,14 @@ class MergeRequestDiffFile < ActiveRecord::Base ...@@ -8,4 +8,14 @@ class MergeRequestDiffFile < ActiveRecord::Base
encode_utf8(diff) if diff.respond_to?(:encoding) encode_utf8(diff) if diff.respond_to?(:encoding)
end end
def diff
binary? ? super.unpack('m0').first : super
end
def to_hash
keys = Gitlab::Git::Diff::SERIALIZE_KEYS - [:diff]
as_json(only: keys).merge(diff: diff).with_indifferent_access
end
end end
---
title: Fix creating merge request diffs when diff contains bytes that are invalid
in UTF-8
merge_request:
author:
class AddBinaryToMergeRequestDiffFiles < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :merge_request_diff_files, :binary, :boolean
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20170724214302) do ActiveRecord::Schema.define(version: 20170725145659) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
...@@ -748,6 +748,7 @@ ActiveRecord::Schema.define(version: 20170724214302) do ...@@ -748,6 +748,7 @@ ActiveRecord::Schema.define(version: 20170724214302) do
t.text "new_path", null: false t.text "new_path", null: false
t.text "old_path", null: false t.text "old_path", null: false
t.text "diff", null: false t.text "diff", null: false
t.boolean "binary"
end end
add_index "merge_request_diff_files", ["merge_request_diff_id", "relative_order"], name: "index_merge_request_diff_files_on_mr_diff_id_and_order", unique: true, using: :btree add_index "merge_request_diff_files", ["merge_request_diff_id", "relative_order"], name: "index_merge_request_diff_files_on_mr_diff_id_and_order", unique: true, using: :btree
......
...@@ -19,14 +19,14 @@ feature 'Ref switcher', feature: true, js: true do ...@@ -19,14 +19,14 @@ feature 'Ref switcher', feature: true, js: true do
input.set 'binary' input.set 'binary'
wait_for_requests wait_for_requests
expect(find('.dropdown-content ul')).to have_selector('li', count: 6) expect(find('.dropdown-content ul')).to have_selector('li', count: 7)
page.within '.dropdown-content ul' do page.within '.dropdown-content ul' do
input.native.send_keys :enter input.native.send_keys :enter
end end
end end
expect(page).to have_title 'binary-encoding' expect(page).to have_title 'add-pdf-text-binary'
end end
it "user selects ref with special characters" do it "user selects ref with special characters" do
......
...@@ -195,6 +195,7 @@ MergeRequestDiffFile: ...@@ -195,6 +195,7 @@ MergeRequestDiffFile:
- a_mode - a_mode
- b_mode - b_mode
- too_large - too_large
- binary
Ci::Pipeline: Ci::Pipeline:
- id - id
- project_id - project_id
......
require 'rails_helper' require 'rails_helper'
describe MergeRequestDiffFile, type: :model do describe MergeRequestDiffFile, type: :model do
describe '#diff' do
let(:unpacked) { 'unpacked' }
let(:packed) { [unpacked].pack('m0') }
before do
subject.diff = packed
end
context 'when the diff is marked as binary' do
before do
subject.binary = true
end
it 'unpacks from base 64' do
expect(subject.diff).to eq(unpacked)
end
end
context 'when the diff is not marked as binary' do
it 'returns the raw diff' do
expect(subject.diff).to eq(packed)
end
end
end
describe '#utf8_diff' do describe '#utf8_diff' do
it 'does not raise error when a hash value is in binary' do it 'does not raise error when the diff is binary' do
subject.diff = "\x05\x00\x68\x65\x6c\x6c\x6f" subject.diff = "\x05\x00\x68\x65\x6c\x6c\x6f"
expect { subject.utf8_diff }.not_to raise_error expect { subject.utf8_diff }.not_to raise_error
......
...@@ -105,6 +105,15 @@ describe MergeRequestDiff, models: true do ...@@ -105,6 +105,15 @@ describe MergeRequestDiff, models: true do
expect(mr_diff.empty?).to be_truthy expect(mr_diff.empty?).to be_truthy
end end
it 'saves binary diffs correctly' do
path = 'files/images/icn-time-tracking.pdf'
mr_diff = create(:merge_request, source_branch: 'add-pdf-text-binary', target_branch: 'master').merge_request_diff
diff_file = mr_diff.merge_request_diff_files.find_by(new_path: path)
expect(diff_file).to be_binary
expect(diff_file.diff).to eq(mr_diff.compare.diffs(paths: [path]).to_a.first.diff)
end
end end
describe '#commit_shas' do describe '#commit_shas' do
......
...@@ -41,7 +41,8 @@ module TestEnv ...@@ -41,7 +41,8 @@ module TestEnv
'csv' => '3dd0896', 'csv' => '3dd0896',
'v1.1.0' => 'b83d6e3', 'v1.1.0' => 'b83d6e3',
'add-ipython-files' => '93ee732', 'add-ipython-files' => '93ee732',
'add-pdf-file' => 'e774ebd' 'add-pdf-file' => 'e774ebd',
'add-pdf-text-binary' => '79faa7b'
}.freeze }.freeze
# gitlab-test-fork is a fork of gitlab-fork, but we don't necessarily # gitlab-test-fork is a fork of gitlab-fork, but we don't necessarily
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment