Commit 1a09d5cd authored by Jarka Kadlecová's avatar Jarka Kadlecová

Render htmlentities correctly for links not supported by Rinku

parent 0ef19f1c
---
title: Render htmlentities correctly for links not supported by Rinku
merge_request:
author:
type: fixed
...@@ -26,7 +26,7 @@ module Banzai ...@@ -26,7 +26,7 @@ module Banzai
# in the generated link. # in the generated link.
# #
# Rubular: http://rubular.com/r/cxjPyZc7Sb # Rubular: http://rubular.com/r/cxjPyZc7Sb
LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)} LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)}
# Text matching LINK_PATTERN inside these elements will not be linked # Text matching LINK_PATTERN inside these elements will not be linked
IGNORE_PARENTS = %w(a code kbd pre script style).to_set IGNORE_PARENTS = %w(a code kbd pre script style).to_set
...@@ -35,42 +35,16 @@ module Banzai ...@@ -35,42 +35,16 @@ module Banzai
TEXT_QUERY = %Q(descendant-or-self::text()[ TEXT_QUERY = %Q(descendant-or-self::text()[
not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')}) not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')})
and contains(., '://') and contains(., '://')
and not(starts-with(., 'http'))
and not(starts-with(., 'ftp'))
]).freeze ]).freeze
def call def call
return doc if context[:autolink] == false return doc if context[:autolink] == false
rinku_parse
text_parse text_parse
end end
private private
# Run the text through Rinku as a first pass
#
# This will quickly autolink http(s) and ftp links.
#
# `@doc` will be re-parsed with the HTML String from Rinku.
def rinku_parse
# Convert the options from a Hash to a String that Rinku expects
options = tag_options(link_options)
# NOTE: We don't parse email links because it will erroneously match
# external Commit and CommitRange references.
#
# The final argument tells Rinku to link short URLs that don't include a
# period (e.g., http://localhost:3000/)
rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1)
return if rinku == html
# Rinku returns a String, so parse it back to a Nokogiri::XML::Document
# for further processing.
@doc = parse_html(rinku)
end
# Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
def contains_unsafe?(scheme) def contains_unsafe?(scheme)
return false unless scheme return false unless scheme
...@@ -79,8 +53,6 @@ module Banzai ...@@ -79,8 +53,6 @@ module Banzai
Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
end end
# Autolinks any text matching LINK_PATTERN that Rinku didn't already
# replace
def text_parse def text_parse
doc.xpath(TEXT_QUERY).each do |node| doc.xpath(TEXT_QUERY).each do |node|
content = node.to_html content = node.to_html
...@@ -113,11 +85,13 @@ module Banzai ...@@ -113,11 +85,13 @@ module Banzai
dropped = ($1 || '').html_safe dropped = ($1 || '').html_safe
options = link_options.merge(href: match) options = link_options.merge(href: match)
content_tag(:a, match, options) + dropped content_tag(:a, match.html_safe, options) + dropped
end end
def autolink_filter(text) def autolink_filter(text)
text.gsub(LINK_PATTERN) { |match| autolink_match(match) } Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:|
autolink_match(link)
end
end end
def link_options def link_options
......
...@@ -14,7 +14,7 @@ module Gitlab ...@@ -14,7 +14,7 @@ module Gitlab
end end
def mark(marker_ranges) def mark(marker_ranges)
return rich_line unless marker_ranges return rich_line unless marker_ranges&.any?
if html_escaped if html_escaped
rich_marker_ranges = [] rich_marker_ranges = []
......
module Gitlab module Gitlab
class StringRegexMarker < StringRangeMarker class StringRegexMarker < StringRangeMarker
def mark(regex, group: 0, &block) def mark(regex, group: 0, &block)
regex_match = raw_line.match(regex) ranges = []
return rich_line unless regex_match
begin_index, end_index = regex_match.offset(group) raw_line.scan(regex) do
name_range = begin_index..(end_index - 1) begin_index, end_index = Regexp.last_match.offset(group)
super([name_range], &block) ranges << (begin_index..(end_index - 1))
end
super(ranges, &block)
end end
end end
end end
...@@ -25,7 +25,7 @@ describe Banzai::Filter::AutolinkFilter do ...@@ -25,7 +25,7 @@ describe Banzai::Filter::AutolinkFilter do
end end
end end
context 'Rinku schemes' do context 'Various schemes' do
it 'autolinks http' do it 'autolinks http' do
doc = filter("See #{link}") doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq link expect(doc.at_css('a').text).to eq link
...@@ -56,33 +56,27 @@ describe Banzai::Filter::AutolinkFilter do ...@@ -56,33 +56,27 @@ describe Banzai::Filter::AutolinkFilter do
expect(doc.at_css('a')['href']).to eq link expect(doc.at_css('a')['href']).to eq link
end end
it 'accepts link_attr options' do it 'autolinks multiple URLs' do
doc = filter("See #{link}", link_attr: { class: 'custom' }) link1 = 'http://localhost:3000/'
link2 = 'http://google.com/'
expect(doc.at_css('a')['class']).to eq 'custom' doc = filter("See #{link1} and #{link2}")
end
described_class::IGNORE_PARENTS.each do |elem| found_links = doc.css('a')
it "ignores valid links contained inside '#{elem}' element" do
exp = act = "<#{elem}>See #{link}</#{elem}>"
expect(filter(act).to_html).to eq exp
end
end
context 'when the input contains link' do expect(found_links.size).to eq(2)
it 'does parse_html back the rinku returned value' do expect(found_links[0].text).to eq(link1)
act = HTML::Pipeline.parse("<p>See #{link}</p>") expect(found_links[0]['href']).to eq(link1)
expect(found_links[1].text).to eq(link2)
expect(found_links[1]['href']).to eq(link2)
end
expect_any_instance_of(described_class).to receive(:parse_html).at_least(:once).and_call_original it 'accepts link_attr options' do
doc = filter("See #{link}", link_attr: { class: 'custom' })
filter(act).to_html expect(doc.at_css('a')['class']).to eq 'custom'
end
end
end end
context 'other schemes' do
let(:link) { 'foo://bar.baz/' }
it 'autolinks smb' do it 'autolinks smb' do
link = 'smb:///Volumes/shared/foo.pdf' link = 'smb:///Volumes/shared/foo.pdf'
doc = filter("See #{link}") doc = filter("See #{link}")
...@@ -91,6 +85,21 @@ describe Banzai::Filter::AutolinkFilter do ...@@ -91,6 +85,21 @@ describe Banzai::Filter::AutolinkFilter do
expect(doc.at_css('a')['href']).to eq link expect(doc.at_css('a')['href']).to eq link
end end
it 'autolinks multiple occurences of smb' do
link1 = 'smb:///Volumes/shared/foo.pdf'
link2 = 'smb:///Volumes/shared/bar.pdf'
doc = filter("See #{link1} and #{link2}")
found_links = doc.css('a')
expect(found_links.size).to eq(2)
expect(found_links[0].text).to eq(link1)
expect(found_links[0]['href']).to eq(link1)
expect(found_links[1].text).to eq(link2)
expect(found_links[1]['href']).to eq(link2)
end
it 'autolinks irc' do it 'autolinks irc' do
link = 'irc://irc.freenode.net/git' link = 'irc://irc.freenode.net/git'
doc = filter("See #{link}") doc = filter("See #{link}")
...@@ -151,4 +160,18 @@ describe Banzai::Filter::AutolinkFilter do ...@@ -151,4 +160,18 @@ describe Banzai::Filter::AutolinkFilter do
end end
end end
end end
context 'when the link is inside a tag' do
it 'renders text after the link correctly for http' do
doc = filter(ERB::Util.html_escape_once("<http://link><another>"))
expect(doc.children.last.text).to include('<another>')
end
it 'renders text after the link correctly for not other protocol' do
doc = filter(ERB::Util.html_escape_once("<rdar://link><another>"))
expect(doc.children.last.text).to include('<another>')
end
end
end end
...@@ -2,17 +2,36 @@ require 'spec_helper' ...@@ -2,17 +2,36 @@ require 'spec_helper'
describe Gitlab::StringRegexMarker do describe Gitlab::StringRegexMarker do
describe '#mark' do describe '#mark' do
context 'with a single occurrence' do
let(:raw) { %{"name": "AFNetworking"} } let(:raw) { %{"name": "AFNetworking"} }
let(:rich) { %{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"AFNetworking"</span>}.html_safe } let(:rich) { %{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"AFNetworking"</span>}.html_safe }
subject do subject do
described_class.new(raw, rich).mark(/"[^"]+":\s*"(?<name>[^"]+)"/, group: :name) do |text, left:, right:| described_class.new(raw, rich).mark(/"[^"]+":\s*"(?<name>[^"]+)"/, group: :name) do |text, left:, right:|
%{<a href="#">#{text}</a>} %{<a href="#">#{text}</a>}
end end
end end
it 'marks the inline diffs' do it 'marks the match' do
expect(subject).to eq(%{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"<a href="#">AFNetworking</a>"</span>}) expect(subject).to eq(%{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"<a href="#">AFNetworking</a>"</span>})
expect(subject).to be_html_safe expect(subject).to be_html_safe
end end
end end
context 'with multiple occurrences' do
let(:raw) { %{a <b> <c> d} }
let(:rich) { %{a &lt;b&gt; &lt;c&gt; d}.html_safe }
subject do
described_class.new(raw, rich).mark(/<[a-z]>/) do |text, left:, right:|
%{<strong>#{text}</strong>}
end
end
it 'marks the matches' do
expect(subject).to eq(%{a <strong>&lt;b&gt;</strong> <strong>&lt;c&gt;</strong> d})
expect(subject).to be_html_safe
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment