Commit 13157870 authored by Nick Thomas's avatar Nick Thomas

Fix denial-of-service attack in Markdown parser

Given a markdown link like:

```md
[foo](foo bar.jpg)
```

Commonmark parsers will not recognise it as a link, because of the
space. To solve this, we have a Banzai filter that uses a regular
expression to detect the unparsed link and rewrite it into a parseable
one, replacing the space with '%20'. However, the use of backtracking
in the regular expression makes it vulnerable to complex inputs.

Switching to Gitlab::UntrustedRegexp lets us guarantee that the regular
expression will always complete in linear time, solving the resource
use issue.

Changelog: security
parent a2eabd5a
......@@ -26,14 +26,17 @@ module Banzai
# Pattern to match a standard markdown link
#
# Rubular: http://rubular.com/r/2EXEQ49rg5
LINK_OR_IMAGE_PATTERN = %r{
(?<preview_operator>!)?
\[(?<text>.+?)\]
\(
(?<new_link>.+?)
(?<title>\ ".+?")?
\)
}x.freeze
#
# This pattern is vulnerable to malicious inputs, so use Gitlab::UntrustedRegexp
# to place bounds on execution time
LINK_OR_IMAGE_PATTERN = Gitlab::UntrustedRegexp.new(
'(?P<preview_operator>!)?' \
'\[(?P<text>.+?)\]' \
'\(' \
'(?P<new_link>.+?)' \
'(?P<title>\ ".+?")?' \
'\)'
)
# Text matching LINK_OR_IMAGE_PATTERN inside these elements will not be linked
IGNORE_PARENTS = %w(a code kbd pre script style).to_set
......@@ -48,7 +51,7 @@ module Banzai
doc.xpath(TEXT_QUERY).each do |node|
content = node.to_html
next unless content.match(LINK_OR_IMAGE_PATTERN)
next unless LINK_OR_IMAGE_PATTERN.match(content)
html = spaced_link_filter(content)
......
......@@ -2,18 +2,20 @@
module Gitlab
class StringRegexMarker < StringRangeMarker
# rubocop: disable CodeReuse/ActiveRecord
def mark(regex, group: 0, &block)
ranges = []
offset = 0
raw_line.scan(regex) do
begin_index, end_index = Regexp.last_match.offset(group)
while match = regex.match(raw_line[offset..])
begin_index = match.begin(group) + offset
end_index = match.end(group) + offset
ranges << (begin_index..(end_index - 1))
offset = end_index
end
super(ranges, &block)
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
......@@ -63,6 +63,16 @@ RSpec.describe Banzai::Filter::SpacedLinkFilter do
end
end
it 'does not process malicious input' do
Timeout.timeout(10) do
doc = filter('[ (](' * 60_000)
found_links = doc.css('a')
expect(found_links.size).to eq(0)
end
end
it 'converts multiple URLs' do
link1 = '[first](slug one)'
link2 = '[second](http://example.com/slug two)'
......
......@@ -23,9 +23,10 @@ RSpec.describe Gitlab::StringRegexMarker do
context 'with multiple occurrences' do
let(:raw) { %{a <b> <c> d} }
let(:rich) { %{a &lt;b&gt; &lt;c&gt; d}.html_safe }
let(:regexp) { /<[a-z]>/ }
subject do
described_class.new(raw, rich).mark(/<[a-z]>/) do |text, left:, right:, mode:|
described_class.new(raw, rich).mark(regexp) do |text, left:, right:, mode:|
%{<strong>#{text}</strong>}.html_safe
end
end
......@@ -34,6 +35,15 @@ RSpec.describe Gitlab::StringRegexMarker do
expect(subject).to eq(%{a <strong>&lt;b&gt;</strong> <strong>&lt;c&gt;</strong> d})
expect(subject).to be_html_safe
end
context 'with a Gitlab::UntrustedRegexp' do
let(:regexp) { Gitlab::UntrustedRegexp.new('<[a-z]>') }
it 'marks the matches' do
expect(subject).to eq(%{a <strong>&lt;b&gt;</strong> <strong>&lt;c&gt;</strong> d})
expect(subject).to be_html_safe
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment