Commit 493bef03 authored by Yorick Peterse's avatar Yorick Peterse

Add API for generating Markdown changelogs

This adds an API that allows users to generate Markdown changelogs,
using Git commit messages and Git trailers as the input. The API is
hidden behind a feature flag called "changelog_api", and is disabled by
default.

For more information, see merge request
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52116 and epic
https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/351.
parent bbf4dfea
# frozen_string_literal: true
module MergeRequests
# OldestPerCommitFinder is used to retrieve the oldest merge requests for
# every given commit, grouped per commit SHA.
#
# This finder is useful when you need to efficiently retrieve the first/oldest
# merge requests for multiple commits, and you want to do so in batches;
# instead of running a query for every commit.
class OldestPerCommitFinder
def initialize(project)
@project = project
end
# Returns a Hash that maps a commit ID to the oldest merge request that
# introduced that commit.
def execute(commits)
id_rows = MergeRequestDiffCommit
.oldest_merge_request_id_per_commit(@project.id, commits.map(&:id))
mrs = MergeRequest
.preload_target_project
.id_in(id_rows.map { |r| r[:merge_request_id] })
.index_by(&:id)
id_rows.each_with_object({}) do |row, hash|
if (mr = mrs[row[:merge_request_id]])
hash[row[:sha]] = mr
end
end
end
end
end
......@@ -35,4 +35,23 @@ class MergeRequestDiffCommit < ApplicationRecord
Gitlab::Database.bulk_insert(self.table_name, rows) # rubocop:disable Gitlab/BulkInsert
end
def self.oldest_merge_request_id_per_commit(project_id, shas)
# This method is defined here and not on MergeRequest, otherwise the SHA
# values used in the WHERE below won't be encoded correctly.
select(['merge_request_diff_commits.sha AS sha', 'min(merge_requests.id) AS merge_request_id'])
.joins(:merge_request_diff)
.joins(
'INNER JOIN merge_requests ' \
'ON merge_requests.latest_merge_request_diff_id = merge_request_diffs.id'
)
.where(sha: shas)
.where(
merge_requests: {
target_project_id: project_id,
state_id: MergeRequest.available_states[:merged]
}
)
.group(:sha)
end
end
# frozen_string_literal: true
module Repositories
# A service class for generating a changelog section.
class ChangelogService
DEFAULT_TRAILER = 'Changelog'
DEFAULT_FILE = 'CHANGELOG.md'
# The `project` specifies the `Project` to generate the changelog section
# for.
#
# The `user` argument specifies a `User` to use for committing the changes
# to the Git repository.
#
# The `version` arguments must be a version `String` using semantic
# versioning as the format.
#
# The arguments `from` and `to` must specify a Git ref or SHA to use for
# fetching the commits to include in the changelog. The SHA/ref set in the
# `from` argument isn't included in the list.
#
# The `date` argument specifies the date of the release, and defaults to the
# current time/date.
#
# The `branch` argument specifies the branch to commit the changes to. The
# branch must already exist.
#
# The `trailer` argument is the Git trailer to use for determining what
# commits to include in the changelog.
#
# The `file` arguments specifies the name/path of the file to commit the
# changes to. If the file doesn't exist, it's created automatically.
#
# The `message` argument specifies the commit message to use when committing
# the changelog changes.
#
# rubocop: disable Metrics/ParameterLists
def initialize(
project,
user,
version:,
from:,
to:,
date: DateTime.now,
branch: project.default_branch_or_master,
trailer: DEFAULT_TRAILER,
file: DEFAULT_FILE,
message: "Add changelog for version #{version}"
)
@project = project
@user = user
@version = version
@from = from
@to = to
@date = date
@branch = branch
@trailer = trailer
@file = file
@message = message
end
# rubocop: enable Metrics/ParameterLists
def execute
# For every entry we want to only include the merge request that
# originally introduced the commit, which is the oldest merge request that
# contains the commit. We fetch there merge requests in batches, reducing
# the number of SQL queries needed to get this data.
mrs_finder = MergeRequests::OldestPerCommitFinder.new(@project)
config = Gitlab::Changelog::Config.from_git(@project)
release = Gitlab::Changelog::Release
.new(version: @version, date: @date, config: config)
commits =
CommitsWithTrailerFinder.new(project: @project, from: @from, to: @to)
commits.each_page(@trailer) do |page|
mrs = mrs_finder.execute(page)
# Preload the authors. This ensures we only need a single SQL query per
# batch of commits, instead of needing a query for every commit.
page.each(&:lazy_author)
page.each do |commit|
release.add_entry(
title: commit.title,
commit: commit,
category: commit.trailers.fetch(@trailer),
author: commit.author,
merge_request: mrs[commit.id]
)
end
end
Gitlab::Changelog::Committer
.new(@project, @user)
.commit(release: release, file: @file, branch: @branch, message: @message)
end
end
end
---
title: Add API for generating Markdown changelogs
merge_request: 52116
author:
type: added
---
name: changelog_api
introduced_by_url: '13.9'
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300043
milestone: '13.9'
type: development
group: group::source code
default_enabled: false
# frozen_string_literal: true
class AddOldestMergeRequestsIndex < ActiveRecord::Migration[6.0]
include Gitlab::Database::SchemaHelpers
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
INDEX = 'index_on_merge_requests_for_latest_diffs'
def up
return if index_exists_by_name?('merge_requests', INDEX)
execute "CREATE INDEX CONCURRENTLY #{INDEX} ON merge_requests " \
'USING btree (target_project_id) INCLUDE (id, latest_merge_request_diff_id)'
create_comment(
'INDEX',
INDEX,
'Index used to efficiently obtain the oldest merge request for a commit SHA'
)
end
def down
return unless index_exists_by_name?('merge_requests', INDEX)
execute "DROP INDEX CONCURRENTLY #{INDEX}"
end
end
c173ba86340efe39977f1b319d1ebcead634e3bfe819a30e230fb4f81766f28a
\ No newline at end of file
......@@ -22433,6 +22433,10 @@ CREATE UNIQUE INDEX index_on_instance_statistics_recorded_at_and_identifier ON a
CREATE INDEX index_on_label_links_all_columns ON label_links USING btree (target_id, label_id, target_type);
CREATE INDEX index_on_merge_requests_for_latest_diffs ON merge_requests USING btree (target_project_id) INCLUDE (id, latest_merge_request_diff_id);
COMMENT ON INDEX index_on_merge_requests_for_latest_diffs IS 'Index used to efficiently obtain the oldest merge request for a commit SHA';
CREATE INDEX index_on_namespaces_lower_name ON namespaces USING btree (lower((name)::text));
CREATE INDEX index_on_namespaces_lower_path ON namespaces USING btree (lower((path)::text));
......
This diff is collapsed.
......@@ -170,6 +170,67 @@ module API
not_found!("Merge Base")
end
end
desc 'Generates a changelog section for a release' do
detail 'This feature was introduced in GitLab 13.9'
end
params do
requires :version,
type: String,
regexp: Gitlab::Regex.unbounded_semver_regex,
desc: 'The version of the release, using the semantic versioning format'
requires :from,
type: String,
desc: 'The first commit in the range of commits to use for the changelog'
requires :to,
type: String,
desc: 'The last commit in the range of commits to use for the changelog'
optional :date,
type: DateTime,
desc: 'The date and time of the release'
optional :branch,
type: String,
desc: 'The branch to commit the changelog changes to'
optional :trailer,
type: String,
desc: 'The Git trailer to use for determining if commits are to be included in the changelog',
default: ::Repositories::ChangelogService::DEFAULT_TRAILER
optional :file,
type: String,
desc: 'The file to commit the changelog changes to',
default: ::Repositories::ChangelogService::DEFAULT_FILE
optional :message,
type: String,
desc: 'The commit message to use when committing the changelog'
end
post ':id/repository/changelog' do
not_found! unless Feature.enabled?(:changelog_api, user_project)
branch = params[:branch] || user_project.default_branch_or_master
access = Gitlab::UserAccess.new(current_user, container: user_project)
unless access.can_push_to_branch?(branch)
forbidden!("You are not allowed to commit a changelog on this branch")
end
service = ::Repositories::ChangelogService.new(
user_project,
current_user,
**declared_params(include_missing: false)
)
service.execute
status(200)
rescue => ex
render_api_error!("Failed to generate the changelog: #{ex.message}", 500)
end
end
end
end
......@@ -26,7 +26,13 @@ module Gitlab
# scratch, otherwise we may end up throwing away changes. As such, all
# the logic is contained within the retry block.
Retriable.retriable(on: CommitError) do
commit = @project.commit(branch)
commit = Gitlab::Git::Commit.last_for_path(
@project.repository,
branch,
file,
literal_pathspec: true
)
content = blob_content(file, commit)
# If the release has already been added (e.g. concurrently by another
......
......@@ -37,7 +37,10 @@ module Gitlab
end
if (template = hash['template'])
config.template = Template::Compiler.new.compile(template)
# We use the full namespace here (and further down) as otherwise Rails
# may use the wrong constant when autoloading is used.
config.template =
::Gitlab::Changelog::Template::Compiler.new.compile(template)
end
if (categories = hash['categories'])
......@@ -54,7 +57,8 @@ module Gitlab
def initialize(project)
@project = project
@date_format = DEFAULT_DATE_FORMAT
@template = Template::Compiler.new.compile(DEFAULT_TEMPLATE)
@template =
::Gitlab::Changelog::Template::Compiler.new.compile(DEFAULT_TEMPLATE)
@categories = {}
end
......
......@@ -98,19 +98,27 @@ module Gitlab
ESCAPED_NEWLINE = /\\\n$/.freeze
# The start tag for ERB tags. These tags will be escaped, preventing
# users FROM USING erb DIRECTLY.
ERB_START_TAG = '<%'
# users from using ERB directly.
ERB_START_TAG = /<\\?\s*\\?\s*%/.freeze
def compile(template)
transformed_lines = ['<% it = variables %>']
# ERB tags must be stripped here, otherwise a user may introduce ERB
# tags by making clever use of whitespace. See
# https://gitlab.com/gitlab-org/gitlab/-/issues/300224 for more
# information.
template = template.gsub(ERB_START_TAG, '<%%')
template.each_line { |line| transformed_lines << transform(line) }
Template.new(transformed_lines.join)
# We use the full namespace here as otherwise Rails may use the wrong
# constant when autoloading is used.
::Gitlab::Changelog::Template::Template.new(transformed_lines.join)
end
def transform(line)
line.gsub!(ESCAPED_NEWLINE, '')
line.gsub!(ERB_START_TAG, '<%%')
# This replacement ensures that "end" blocks on their own lines
# don't add extra newlines. Using an ERB -%> tag sadly swallows too
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe MergeRequests::OldestPerCommitFinder do
describe '#execute' do
it 'returns a Hash mapping commit SHAs to their oldest merge requests' do
project = create(:project)
mr1 = create(:merge_request, :merged, target_project: project)
mr2 = create(:merge_request, :merged, target_project: project)
mr1_diff = create(:merge_request_diff, merge_request: mr1)
mr2_diff = create(:merge_request_diff, merge_request: mr2)
sha1 = Digest::SHA1.hexdigest('foo')
sha2 = Digest::SHA1.hexdigest('bar')
create(:merge_request_diff_commit, merge_request_diff: mr1_diff, sha: sha1)
create(:merge_request_diff_commit, merge_request_diff: mr2_diff, sha: sha1)
create(
:merge_request_diff_commit,
merge_request_diff: mr2_diff,
sha: sha2,
relative_order: 1
)
commits = [double(:commit, id: sha1), double(:commit, id: sha2)]
expect(described_class.new(project).execute(commits)).to eq(
sha1 => mr1,
sha2 => mr2
)
end
it 'skips merge requests that are not merged' do
mr = create(:merge_request)
mr_diff = create(:merge_request_diff, merge_request: mr)
sha = Digest::SHA1.hexdigest('foo')
create(:merge_request_diff_commit, merge_request_diff: mr_diff, sha: sha)
commits = [double(:commit, id: sha)]
expect(described_class.new(mr.target_project).execute(commits))
.to be_empty
end
end
end
......@@ -86,5 +86,43 @@ RSpec.describe Gitlab::Changelog::Committer do
end.not_to raise_error
end
end
context "when the changelog changes before saving the changes" do
it 'raises a CommitError' do
release1 = Gitlab::Changelog::Release
.new(version: '1.0.0', date: Time.utc(2020, 1, 1), config: config)
release2 = Gitlab::Changelog::Release
.new(version: '2.0.0', date: Time.utc(2020, 1, 1), config: config)
# This creates the initial commit we'll later use to see if the
# changelog changed before saving our changes.
committer.commit(
release: release1,
file: 'CHANGELOG.md',
branch: 'master',
message: 'Initial commit'
)
allow(Gitlab::Git::Commit)
.to receive(:last_for_path)
.with(
project.repository,
'master',
'CHANGELOG.md',
literal_pathspec: true
)
.and_return(double(:commit, sha: 'foo'))
expect do
committer.commit(
release: release2,
file: 'CHANGELOG.md',
branch: 'master',
message: 'Test commit'
)
end.to raise_error(described_class::CommitError)
end
end
end
end
......@@ -125,5 +125,12 @@ RSpec.describe Gitlab::Changelog::Template::Compiler do
expect(compile(input)).to eq(input)
end
it 'ignores malicious code that makes use of whitespace' do
input = "x<\\\n%::Kernel.system(\"id\")%>"
expect(Kernel).not_to receive(:system).with('id')
expect(compile(input)).to eq('x<%::Kernel.system("id")%>')
end
end
end
......@@ -610,4 +610,102 @@ RSpec.describe API::Repositories do
end
end
end
describe 'POST /projects/:id/repository/changelog' do
context 'when the changelog_api feature flag is enabled' do
it 'generates the changelog for a version' do
spy = instance_spy(Repositories::ChangelogService)
allow(Repositories::ChangelogService)
.to receive(:new)
.with(
project,
user,
version: '1.0.0',
from: 'foo',
to: 'bar',
date: DateTime.new(2020, 1, 1),
branch: 'kittens',
trailer: 'Foo',
file: 'FOO.md',
message: 'Commit message'
)
.and_return(spy)
allow(spy).to receive(:execute)
post(
api("/projects/#{project.id}/repository/changelog", user),
params: {
version: '1.0.0',
from: 'foo',
to: 'bar',
date: '2020-01-01',
branch: 'kittens',
trailer: 'Foo',
file: 'FOO.md',
message: 'Commit message'
}
)
expect(response).to have_gitlab_http_status(:ok)
end
it 'produces an error when generating the changelog fails' do
spy = instance_spy(Repositories::ChangelogService)
allow(Repositories::ChangelogService)
.to receive(:new)
.with(
project,
user,
version: '1.0.0',
from: 'foo',
to: 'bar',
date: DateTime.new(2020, 1, 1),
branch: 'kittens',
trailer: 'Foo',
file: 'FOO.md',
message: 'Commit message'
)
.and_return(spy)
allow(spy)
.to receive(:execute)
.and_raise(Gitlab::Changelog::Committer::CommitError.new('oops'))
post(
api("/projects/#{project.id}/repository/changelog", user),
params: {
version: '1.0.0',
from: 'foo',
to: 'bar',
date: '2020-01-01',
branch: 'kittens',
trailer: 'Foo',
file: 'FOO.md',
message: 'Commit message'
}
)
expect(response).to have_gitlab_http_status(:internal_server_error)
expect(json_response['message']).to eq('Failed to generate the changelog: oops')
end
end
context 'when the changelog_api feature flag is disabled' do
before do
stub_feature_flags(changelog_api: false)
end
it 'responds with a 404 Not Found' do
post(
api("/projects/#{project.id}/repository/changelog", user),
params: { version: '1.0.0', from: 'foo', to: 'bar' }
)
expect(response).to have_gitlab_http_status(:not_found)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Repositories::ChangelogService do
describe '#execute' do
it 'generates and commits a changelog section' do
project = create(:project, :empty_repo)
creator = project.creator
author1 = create(:user)
author2 = create(:user)
project.add_maintainer(author1)
project.add_maintainer(author2)
mr1 = create(:merge_request, :merged, target_project: project)
mr2 = create(:merge_request, :merged, target_project: project)
# The range of commits ignores the first commit, but includes the last
# commit. To ensure both the commits below are included, we must create an
# extra commit.
#
# In the real world, the start commit of the range will be the last commit
# of the previous release, so ignoring that is expected and desired.
sha1 = create_commit(
project,
creator,
commit_message: 'Initial commit',
actions: [{ action: 'create', content: 'test', file_path: 'README.md' }]
)
sha2 = create_commit(
project,
author1,
commit_message: "Title 1\n\nChangelog: feature",
actions: [{ action: 'create', content: 'foo', file_path: 'a.txt' }]
)
sha3 = create_commit(
project,
author2,
commit_message: "Title 2\n\nChangelog: feature",
actions: [{ action: 'create', content: 'bar', file_path: 'b.txt' }]
)
commit1 = project.commit(sha2)
commit2 = project.commit(sha3)
allow(MergeRequestDiffCommit)
.to receive(:oldest_merge_request_id_per_commit)
.with(project.id, [commit2.id, commit1.id])
.and_return([
{ sha: sha2, merge_request_id: mr1.id },
{ sha: sha3, merge_request_id: mr2.id }
])
recorder = ActiveRecord::QueryRecorder.new do
described_class
.new(project, creator, version: '1.0.0', from: sha1, to: sha3)
.execute
end
changelog = project.repository.blob_at('master', 'CHANGELOG.md')&.data
expect(recorder.count).to eq(10)
expect(changelog).to include('Title 1', 'Title 2')
end
end
def create_commit(project, user, params)
params = { start_branch: 'master', branch_name: 'master' }.merge(params)
Files::MultiService.new(project, user, params).execute.fetch(:result)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment