Commit 124fa93a authored by Yorick Peterse's avatar Yorick Peterse

Merge branch 'remove-mr-diff-serialised-columns' into 'master'

Remove serialised diff and commit columns

Closes #39533

See merge request gitlab-org/gitlab-ce!15582
parents 552c9089 484ae2ee
......@@ -27,7 +27,7 @@ class Projects::MergeRequests::DiffsController < Projects::MergeRequests::Applic
@merge_request.merge_request_diff
end
@merge_request_diffs = @merge_request.merge_request_diffs.viewable.select_without_diff.order_id_desc
@merge_request_diffs = @merge_request.merge_request_diffs.viewable.order_id_desc
@comparable_diffs = @merge_request_diffs.select { |diff| diff.id < @merge_request_diff.id }
if params[:start_sha].present?
......
......@@ -283,9 +283,9 @@ class MergeRequest < ActiveRecord::Base
if persisted?
merge_request_diff.commit_shas
elsif compare_commits
compare_commits.reverse.map(&:sha)
compare_commits.to_a.reverse.map(&:sha)
else
[]
Array(diff_head_sha)
end
end
......@@ -494,7 +494,7 @@ class MergeRequest < ActiveRecord::Base
def merge_request_diff_for(diff_refs_or_sha)
@merge_request_diffs_by_diff_refs_or_sha ||= Hash.new do |h, diff_refs_or_sha|
diffs = merge_request_diffs.viewable.select_without_diff
diffs = merge_request_diffs.viewable
h[diff_refs_or_sha] =
if diff_refs_or_sha.is_a?(Gitlab::Diff::DiffRefs)
diffs.find_by_diff_refs(diff_refs_or_sha)
......@@ -918,28 +918,18 @@ class MergeRequest < ActiveRecord::Base
# Note that this could also return SHA from now dangling commits
#
def all_commit_shas
if persisted?
# MySQL doesn't support LIMIT in a subquery.
diffs_relation =
if Gitlab::Database.postgresql?
merge_request_diffs.order(id: :desc).limit(100)
else
merge_request_diffs
end
return commit_shas unless persisted?
column_shas = MergeRequestDiffCommit
.where(merge_request_diff: diffs_relation)
.limit(10_000)
.pluck('sha')
diffs_relation = merge_request_diffs
serialised_shas = merge_request_diffs.where.not(st_commits: nil).flat_map(&:commit_shas)
# MySQL doesn't support LIMIT in a subquery.
diffs_relation = diffs_relation.recent if Gitlab::Database.postgresql?
(column_shas + serialised_shas).uniq
elsif compare_commits
compare_commits.to_a.reverse.map(&:id)
else
[diff_head_sha]
end
MergeRequestDiffCommit
.where(merge_request_diff: diffs_relation)
.limit(10_000)
.pluck('sha')
.uniq
end
def merge_commit
......
class MergeRequestDiff < ActiveRecord::Base
include Sortable
include Importable
include Gitlab::EncodingHelper
include ManualInverseAssociation
include IgnorableColumn
# Prevent store of diff if commits amount more then 500
# Don't display more than 100 commits at once
COMMITS_SAFE_SIZE = 100
# Valid types of serialized diffs allowed by Gitlab::Git::Diff
VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze
ignore_column :st_commits,
:st_diffs
belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff
......@@ -16,9 +16,6 @@ class MergeRequestDiff < ActiveRecord::Base
has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) }
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
serialize :st_commits # rubocop:disable Cop/ActiveRecordSerialize
serialize :st_diffs # rubocop:disable Cop/ActiveRecordSerialize
state_machine :state, initial: :empty do
state :collected
state :overflow
......@@ -32,6 +29,8 @@ class MergeRequestDiff < ActiveRecord::Base
scope :viewable, -> { without_state(:empty) }
scope :recent, -> { order(id: :desc).limit(100) }
# All diff information is collected from repository after object is created.
# It allows you to override variables like head_commit_sha before getting diff.
after_create :save_git_content, unless: :importing?
......@@ -40,14 +39,6 @@ class MergeRequestDiff < ActiveRecord::Base
find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
end
def self.select_without_diff
select(column_names - ['st_diffs'])
end
def st_commits
super || []
end
# Collect information about commits and diff from repository
# and save it to the database as serialized data
def save_git_content
......@@ -129,11 +120,7 @@ class MergeRequestDiff < ActiveRecord::Base
end
def commit_shas
if st_commits.present?
st_commits.map { |commit| commit[:id] }
else
merge_request_diff_commits.map(&:sha)
end
merge_request_diff_commits.map(&:sha)
end
def diff_refs=(new_diff_refs)
......@@ -208,34 +195,11 @@ class MergeRequestDiff < ActiveRecord::Base
end
def commits_count
if st_commits.present?
st_commits.size
else
merge_request_diff_commits.size
end
end
def utf8_st_diffs
return [] if st_diffs.blank?
st_diffs.map do |diff|
diff.each do |k, v|
diff[k] = encode_utf8(v) if v.respond_to?(:encoding)
end
end
merge_request_diff_commits.size
end
private
# Old GitLab implementations may have generated diffs as ["--broken-diff"].
# Avoid an error 500 by ignoring bad elements. See:
# https://gitlab.com/gitlab-org/gitlab-ce/issues/20776
def valid_raw_diff?(raw)
return false unless raw.respond_to?(:each)
raw.any? { |element| VALID_CLASSES.include?(element.class) }
end
def create_merge_request_diff_files(diffs)
rows = diffs.map.with_index do |diff, index|
diff_hash = diff.to_hash.merge(
......@@ -259,9 +223,7 @@ class MergeRequestDiff < ActiveRecord::Base
end
def load_diffs(options)
return Gitlab::Git::DiffCollection.new([]) unless diffs_from_database
raw = diffs_from_database
raw = merge_request_diff_files.map(&:to_hash)
if paths = options[:paths]
raw = raw.select do |diff|
......@@ -272,22 +234,8 @@ class MergeRequestDiff < ActiveRecord::Base
Gitlab::Git::DiffCollection.new(raw, options)
end
def diffs_from_database
return @diffs_from_database if defined?(@diffs_from_database)
@diffs_from_database =
if st_diffs.present?
if valid_raw_diff?(st_diffs)
st_diffs
end
elsif merge_request_diff_files.present?
merge_request_diff_files.map(&:to_hash)
end
end
def load_commits
commits = st_commits.presence || merge_request_diff_commits
commits = commits.map { |commit| Commit.from_hash(commit.to_hash, project) }
commits = merge_request_diff_commits.map { |commit| Commit.from_hash(commit.to_hash, project) }
CommitCollection
.new(merge_request.source_project, commits, merge_request.source_branch)
......
class CleanUpFromMergeRequestDiffsAndCommits < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
include ::EachBatch
end
disable_ddl_transaction!
def up
Gitlab::BackgroundMigration.steal('DeserializeMergeRequestDiffsAndCommits')
# The literal '--- []\n' value is created by the import process and treated
# as null by the application, so we can ignore those - even if we were
# migrating, it wouldn't create any rows.
literal_prefix = Gitlab::Database.postgresql? ? 'E' : ''
non_empty = "
(st_commits IS NOT NULL AND st_commits != #{literal_prefix}'--- []\n')
OR
(st_diffs IS NOT NULL AND st_diffs != #{literal_prefix}'--- []\n')
".squish
MergeRequestDiff.where(non_empty).each_batch(of: 500) do |relation, index|
range = relation.pluck('MIN(id)', 'MAX(id)').first
Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits.new.perform(*range)
end
end
def down
end
end
......@@ -3,8 +3,19 @@ class LimitsToMysql < ActiveRecord::Migration
def up
return unless ActiveRecord::Base.configurations[Rails.env]['adapter'] =~ /^mysql/
change_column :merge_request_diffs, :st_commits, :text, limit: 2147483647
change_column :merge_request_diffs, :st_diffs, :text, limit: 2147483647
# These columns were removed in 10.3, but this is called from two places:
# 1. A migration run after they were added, but before they were removed.
# 2. A rake task which can be run at any time.
#
# Because of item 2, we need these checks.
if column_exists?(:merge_request_diffs, :st_commits)
change_column :merge_request_diffs, :st_commits, :text, limit: 2147483647
end
if column_exists?(:merge_request_diffs, :st_diffs)
change_column :merge_request_diffs, :st_diffs, :text, limit: 2147483647
end
change_column :snippets, :content, :text, limit: 2147483647
change_column :notes, :st_diff, :text, limit: 2147483647
end
......
class RemoveMergeRequestDiffStCommitsAndStDiffs < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
remove_column :merge_request_diffs, :st_commits, :text
remove_column :merge_request_diffs, :st_diffs, :text
end
end
class AddIndexOnMergeRequestDiffsMergeRequestIdAndId < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index(:merge_request_diffs, [:merge_request_id, :id])
end
def down
if index_exists?(:merge_request_diffs, [:merge_request_id, :id])
remove_concurrent_index(:merge_request_diffs, [:merge_request_id, :id])
end
end
end
class RemoveIndexOnMergeRequestDiffsMergeRequestDiffId < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
if index_exists?(:merge_request_diffs, :merge_request_id)
remove_concurrent_index(:merge_request_diffs, :merge_request_id)
end
end
def down
add_concurrent_index(:merge_request_diffs, :merge_request_id)
end
end
......@@ -1014,8 +1014,6 @@ ActiveRecord::Schema.define(version: 20171124150326) do
create_table "merge_request_diffs", force: :cascade do |t|
t.string "state"
t.text "st_commits"
t.text "st_diffs"
t.integer "merge_request_id", null: false
t.datetime "created_at"
t.datetime "updated_at"
......@@ -1025,7 +1023,7 @@ ActiveRecord::Schema.define(version: 20171124150326) do
t.string "start_commit_sha"
end
add_index "merge_request_diffs", ["merge_request_id"], name: "index_merge_request_diffs_on_merge_request_id", using: :btree
add_index "merge_request_diffs", ["merge_request_id", "id"], name: "index_merge_request_diffs_on_merge_request_id_and_id", using: :btree
create_table "merge_request_metrics", force: :cascade do |t|
t.integer "merge_request_id", null: false
......
......@@ -163,7 +163,7 @@ Starting a project from a template needs this project to be exported. On a
up to date master branch with run:
```
gdk run db
gdk run
# In a new terminal window
bundle exec rake gitlab:update_project_templates
git checkout -b update-project-templates
......
......@@ -731,8 +731,6 @@ X-Gitlab-Event: Merge Request Hook
"title": "MS-Viewport",
"created_at": "2013-12-03T17:23:34Z",
"updated_at": "2013-12-03T17:23:34Z",
"st_commits": null,
"st_diffs": null,
"milestone_id": null,
"state": "opened",
"merge_status": "unchecked",
......
......@@ -30,7 +30,8 @@ with all their related data and be moved into a new GitLab instance.
| GitLab version | Import/Export version |
| ---------------- | --------------------- |
| 10.0 to current | 0.2.0 |
| 10.3 to current | 0.2.1 |
| 10.0 | 0.2.0 |
| 9.4.0 | 0.1.8 |
| 9.2.0 | 0.1.7 |
| 8.17.0 | 0.1.6 |
......
......@@ -3,7 +3,6 @@ module Gitlab
class PlanEventFetcher < BaseEventFetcher
def initialize(*args)
@projections = [mr_diff_table[:id],
mr_diff_table[:st_commits],
issue_metrics_table[:first_mentioned_in_commit_at]]
super(*args)
......@@ -37,12 +36,7 @@ module Gitlab
def first_time_reference_commit(event)
return nil unless event && merge_request_diff_commits
commits =
if event['st_commits'].present?
YAML.load(event['st_commits'])
else
merge_request_diff_commits[event['id'].to_i]
end
commits = merge_request_diff_commits[event['id'].to_i]
return nil if commits.blank?
......
......@@ -3,7 +3,7 @@ module Gitlab
extend self
# For every version update, the version history in import_export.md has to be kept up to date.
VERSION = '0.2.0'.freeze
VERSION = '0.2.1'.freeze
FILENAME_LIMIT = 50
def export_path(relative_path:)
......
......@@ -133,8 +133,6 @@ methods:
- :type
services:
- :type
merge_request_diff:
- :utf8_st_diffs
merge_request_diff_files:
- :utf8_diff
merge_requests:
......
......@@ -58,7 +58,6 @@ module Gitlab
def setup_models
case @relation_name
when :merge_request_diff then setup_st_diff_commits
when :merge_request_diff_files then setup_diff
when :notes then setup_note
when :project_label, :project_labels then setup_label
......@@ -208,13 +207,6 @@ module Gitlab
relation_class: relation_class)
end
def setup_st_diff_commits
@relation_hash['st_diffs'] = @relation_hash.delete('utf8_st_diffs')
HashUtil.deep_symbolize_array!(@relation_hash['st_diffs'])
HashUtil.deep_symbolize_array_with_date!(@relation_hash['st_commits'])
end
def setup_diff
@relation_hash['diff'] = @relation_hash.delete('utf8_diff')
end
......
require 'spec_helper'
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate, :migration, schema: 20171114162227 do
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
describe '#perform' do
let(:merge_request) { create(:merge_request) }
let(:merge_request_diff) { merge_request.merge_request_diff }
let(:project) { create(:project, :repository) }
let(:merge_request) { merge_requests.create!(iid: 1, target_project_id: project.id, source_project_id: project.id, target_branch: 'feature', source_branch: 'master').becomes(MergeRequest) }
let(:merge_request_diff) { MergeRequest.find(merge_request.id).create_merge_request_diff }
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
def diffs_to_hashes(diffs)
......@@ -68,7 +72,7 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :t
let(:stop_id) { described_class::MergeRequestDiff.maximum(:id) }
before do
merge_request.reload_diff(true)
merge_request.create_merge_request_diff
convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
......@@ -288,7 +292,7 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :t
context 'when the merge request diffs are Rugged::Patch instances' do
let(:commits) { merge_request_diff.commits.map(&:to_hash) }
let(:first_commit) { merge_request.project.repository.commit(merge_request_diff.head_commit_sha) }
let(:first_commit) { project.repository.commit(merge_request_diff.head_commit_sha) }
let(:expected_commits) { commits }
let(:diffs) { first_commit.rugged_diff_from_parent.patches }
let(:expected_diffs) { [] }
......@@ -298,7 +302,7 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :t
context 'when the merge request diffs are Rugged::Diff::Delta instances' do
let(:commits) { merge_request_diff.commits.map(&:to_hash) }
let(:first_commit) { merge_request.project.repository.commit(merge_request_diff.head_commit_sha) }
let(:first_commit) { project.repository.commit(merge_request_diff.head_commit_sha) }
let(:expected_commits) { commits }
let(:diffs) { first_commit.rugged_diff_from_parent.deltas }
let(:expected_diffs) { [] }
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -95,26 +95,12 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
end
end
it 'has the correct data for merge request st_diffs' do
# makes sure we are renaming the custom method +utf8_st_diffs+ into +st_diffs+
# one MergeRequestDiff uses the new format, where st_diffs is expected to be nil
expect(MergeRequestDiff.where.not(st_diffs: nil).count).to eq(8)
end
it 'has the correct data for merge request diff files' do
expect(MergeRequestDiffFile.where.not(diff: nil).count).to eq(9)
expect(MergeRequestDiffFile.where.not(diff: nil).count).to eq(55)
end
it 'has the correct data for merge request diff commits in serialised and table formats' do
expect(MergeRequestDiff.where.not(st_commits: nil).count).to eq(7)
expect(MergeRequestDiffCommit.count).to eq(6)
end
it 'has the correct time for merge request st_commits' do
st_commits = MergeRequestDiff.where.not(st_commits: nil).first.st_commits
expect(st_commits.first[:committed_date]).to be_kind_of(Time)
it 'has the correct data for merge request diff commits' do
expect(MergeRequestDiffCommit.count).to eq(77)
end
it 'has the correct data for merge request latest_merge_request_diff' do
......
......@@ -93,10 +93,6 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
expect(saved_project_json['merge_requests'].first['merge_request_diff']).not_to be_empty
end
it 'has merge requests diff st_diffs' do
expect(saved_project_json['merge_requests'].first['merge_request_diff']['utf8_st_diffs']).not_to be_nil
end
it 'has merge request diff files' do
expect(saved_project_json['merge_requests'].first['merge_request_diff']['merge_request_diff_files']).not_to be_empty
end
......@@ -172,12 +168,6 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
expect(saved_project_json['custom_attributes'].count).to eq(2)
end
it 'does not complain about non UTF-8 characters in MR diffs' do
ActiveRecord::Base.connection.execute("UPDATE merge_request_diffs SET st_diffs = '---\n- :diff: !binary |-\n LS0tIC9kZXYvbnVsbAorKysgYi9pbWFnZXMvbnVjb3IucGRmCkBAIC0wLDAg\n KzEsMTY3OSBAQAorJVBERi0xLjUNJeLjz9MNCisxIDAgb2JqDTw8L01ldGFk\n YXR'")
expect(project_tree_saver.save).to be true
end
it 'does not complain about non UTF-8 characters in MR diff files' do
ActiveRecord::Base.connection.execute("UPDATE merge_request_diff_files SET diff = '---\n- :diff: !binary |-\n LS0tIC9kZXYvbnVsbAorKysgYi9pbWFnZXMvbnVjb3IucGRmCkBAIC0wLDAg\n KzEsMTY3OSBAQAorJVBERi0xLjUNJeLjz9MNCisxIDAgb2JqDTw8L01ldGFk\n YXR'")
......
......@@ -173,7 +173,6 @@ MergeRequest:
MergeRequestDiff:
- id
- state
- st_commits
- merge_request_id
- created_at
- updated_at
......
require 'spec_helper'
describe MergeRequestDiff do
let(:diff_with_commits) { create(:merge_request).merge_request_diff }
describe 'create new record' do
subject { create(:merge_request).merge_request_diff }
subject { diff_with_commits }
it { expect(subject).to be_valid }
it { expect(subject).to be_persisted }
......@@ -23,57 +25,41 @@ describe MergeRequestDiff do
end
describe '#diffs' do
let(:mr) { create(:merge_request, :with_diffs) }
let(:mr_diff) { mr.merge_request_diff }
context 'when the :ignore_whitespace_change option is set' do
it 'creates a new compare object instead of loading from the DB' do
expect(mr_diff).not_to receive(:load_diffs)
expect(mr_diff.compare).to receive(:diffs).and_call_original
expect(diff_with_commits).not_to receive(:load_diffs)
expect(diff_with_commits.compare).to receive(:diffs).and_call_original
mr_diff.raw_diffs(ignore_whitespace_change: true)
diff_with_commits.raw_diffs(ignore_whitespace_change: true)
end
end
context 'when the raw diffs are empty' do
before do
MergeRequestDiffFile.delete_all(merge_request_diff_id: mr_diff.id)
end
it 'returns an empty DiffCollection' do
expect(mr_diff.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(mr_diff.raw_diffs).to be_empty
end
end
context 'when the raw diffs have invalid content' do
before do
MergeRequestDiffFile.delete_all(merge_request_diff_id: mr_diff.id)
mr_diff.update_attributes(st_diffs: ["--broken-diff"])
MergeRequestDiffFile.delete_all(merge_request_diff_id: diff_with_commits.id)
end
it 'returns an empty DiffCollection' do
expect(mr_diff.raw_diffs.to_a).to be_empty
expect(mr_diff.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(mr_diff.raw_diffs).to be_empty
expect(diff_with_commits.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(diff_with_commits.raw_diffs).to be_empty
end
end
context 'when the raw diffs exist' do
it 'returns the diffs' do
expect(mr_diff.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(mr_diff.raw_diffs).not_to be_empty
expect(diff_with_commits.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(diff_with_commits.raw_diffs).not_to be_empty
end
context 'when the :paths option is set' do
let(:diffs) { mr_diff.raw_diffs(paths: ['files/ruby/popen.rb', 'files/ruby/popen.rb']) }
let(:diffs) { diff_with_commits.raw_diffs(paths: ['files/ruby/popen.rb', 'files/ruby/popen.rb']) }
it 'only returns diffs that match the (old path, new path) given' do
expect(diffs.map(&:new_path)).to contain_exactly('files/ruby/popen.rb')
end
it 'uses the diffs from the DB' do
expect(mr_diff).to receive(:load_diffs)
expect(diff_with_commits).to receive(:load_diffs)
diffs
end
......@@ -117,51 +103,29 @@ describe MergeRequestDiff do
end
describe '#commit_shas' do
it 'returns all commits SHA using serialized commits' do
subject.st_commits = [
{ id: 'sha1' },
{ id: 'sha2' }
]
expect(subject.commit_shas).to eq(%w(sha1 sha2))
it 'returns all commit SHAs using commits from the DB' do
expect(diff_with_commits.commit_shas).not_to be_empty
expect(diff_with_commits.commit_shas).to all(match(/\h{40}/))
end
end
describe '#compare_with' do
subject { create(:merge_request, source_branch: 'fix').merge_request_diff }
it 'delegates compare to the service' do
expect(CompareService).to receive(:new).and_call_original
subject.compare_with(nil)
diff_with_commits.compare_with(nil)
end
it 'uses git diff A..B approach by default' do
diffs = subject.compare_with('0b4bc9a49b562e85de7cc9e834518ea6828729b9').diffs
diffs = diff_with_commits.compare_with('0b4bc9a49b562e85de7cc9e834518ea6828729b9').diffs
expect(diffs.size).to eq(3)
expect(diffs.size).to eq(21)
end
end
describe '#commits_count' do
it 'returns number of commits using serialized commits' do
subject.st_commits = [
{ id: 'sha1' },
{ id: 'sha2' }
]
expect(subject.commits_count).to eq 2
end
end
describe '#utf8_st_diffs' do
it 'does not raise error when a hash value is in binary' do
subject.st_diffs = [
{ diff: "\0" },
{ diff: "\x05\x00\x68\x65\x6c\x6c\x6f" }
]
expect { subject.utf8_st_diffs }.not_to raise_error
expect(diff_with_commits.commits_count).to eq(29)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment