Commit 396b8f91 authored by Sean McGivern's avatar Sean McGivern

Fix saving diffs that are not valid UTF-8

Previously, we used Psych, which would:

1. Check if a string was encoded as binary, and not ASCII-compatible.
2. Add the !binary tag in that case.
3. Convert to base64.

We need to do the same thing, using a new column in place of the tag.
parent 0c563225
......@@ -236,10 +236,21 @@ class MergeRequestDiff < ActiveRecord::Base
def create_merge_request_diff_files(diffs)
rows = diffs.map.with_index do |diff, index|
diff.to_hash.merge(
diff_hash = diff.to_hash.merge(
binary: false,
merge_request_diff_id: self.id,
relative_order: index
)
# Compatibility with old diffs created with Psych.
diff_hash.tap do |hash|
diff_text = hash[:diff]
if diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?
hash[:binary] = true
hash[:diff] = [diff_text].pack('m0')
end
end
end
Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
......@@ -268,9 +279,7 @@ class MergeRequestDiff < ActiveRecord::Base
st_diffs
end
elsif merge_request_diff_files.present?
merge_request_diff_files
.as_json(only: Gitlab::Git::Diff::SERIALIZE_KEYS)
.map(&:with_indifferent_access)
merge_request_diff_files.map(&:to_hash)
end
end
......
......@@ -8,4 +8,14 @@ class MergeRequestDiffFile < ActiveRecord::Base
encode_utf8(diff) if diff.respond_to?(:encoding)
end
def diff
binary? ? super.unpack('m0').first : super
end
def to_hash
keys = Gitlab::Git::Diff::SERIALIZE_KEYS - [:diff]
as_json(only: keys).merge(diff: diff).with_indifferent_access
end
end
---
title: Fix creating merge request diffs when diff contains bytes that are invalid
in UTF-8
merge_request:
author:
class AddBinaryToMergeRequestDiffFiles < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :merge_request_diff_files, :binary, :boolean
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20170724214302) do
ActiveRecord::Schema.define(version: 20170725145659) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -748,6 +748,7 @@ ActiveRecord::Schema.define(version: 20170724214302) do
t.text "new_path", null: false
t.text "old_path", null: false
t.text "diff", null: false
t.boolean "binary"
end
add_index "merge_request_diff_files", ["merge_request_diff_id", "relative_order"], name: "index_merge_request_diff_files_on_mr_diff_id_and_order", unique: true, using: :btree
......
......@@ -19,14 +19,14 @@ feature 'Ref switcher', feature: true, js: true do
input.set 'binary'
wait_for_requests
expect(find('.dropdown-content ul')).to have_selector('li', count: 6)
expect(find('.dropdown-content ul')).to have_selector('li', count: 7)
page.within '.dropdown-content ul' do
input.native.send_keys :enter
end
end
expect(page).to have_title 'binary-encoding'
expect(page).to have_title 'add-pdf-text-binary'
end
it "user selects ref with special characters" do
......
......@@ -195,6 +195,7 @@ MergeRequestDiffFile:
- a_mode
- b_mode
- too_large
- binary
Ci::Pipeline:
- id
- project_id
......
require 'rails_helper'
describe MergeRequestDiffFile, type: :model do
describe '#diff' do
let(:unpacked) { 'unpacked' }
let(:packed) { [unpacked].pack('m0') }
before do
subject.diff = packed
end
context 'when the diff is marked as binary' do
before do
subject.binary = true
end
it 'unpacks from base 64' do
expect(subject.diff).to eq(unpacked)
end
end
context 'when the diff is not marked as binary' do
it 'returns the raw diff' do
expect(subject.diff).to eq(packed)
end
end
end
describe '#utf8_diff' do
it 'does not raise error when a hash value is in binary' do
it 'does not raise error when the diff is binary' do
subject.diff = "\x05\x00\x68\x65\x6c\x6c\x6f"
expect { subject.utf8_diff }.not_to raise_error
......
......@@ -105,6 +105,15 @@ describe MergeRequestDiff, models: true do
expect(mr_diff.empty?).to be_truthy
end
it 'saves binary diffs correctly' do
path = 'files/images/icn-time-tracking.pdf'
mr_diff = create(:merge_request, source_branch: 'add-pdf-text-binary', target_branch: 'master').merge_request_diff
diff_file = mr_diff.merge_request_diff_files.find_by(new_path: path)
expect(diff_file).to be_binary
expect(diff_file.diff).to eq(mr_diff.compare.diffs(paths: [path]).to_a.first.diff)
end
end
describe '#commit_shas' do
......
......@@ -41,7 +41,8 @@ module TestEnv
'csv' => '3dd0896',
'v1.1.0' => 'b83d6e3',
'add-ipython-files' => '93ee732',
'add-pdf-file' => 'e774ebd'
'add-pdf-file' => 'e774ebd',
'add-pdf-text-binary' => '79faa7b'
}.freeze
# gitlab-test-fork is a fork of gitlab-fork, but we don't necessarily
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment