Commit 1c375330 authored by Jan Provaznik's avatar Jan Provaznik

Re-escape whole HTML content instead of only match

When we un-escape HTML text to find references in it, we should then
re-escape the whole text again, not only found matches.

Because we replace matches with milestone/label links (which contain
HTML tags we don't want to escape again), we re-escape HTML text
with placeholders instead of these links and then replace placeholders
in the escaped text.
parent 5c1b8d03
---
title: Make sure HTML text is always escaped when replacing label/milestone references.
merge_request:
author:
type: security
...@@ -7,6 +7,14 @@ module Banzai ...@@ -7,6 +7,14 @@ module Banzai
class AbstractReferenceFilter < ReferenceFilter class AbstractReferenceFilter < ReferenceFilter
include CrossProjectReference include CrossProjectReference
# REFERENCE_PLACEHOLDER is used for re-escaping HTML text except found
# reference (which we replace with placeholder during re-scaping). The
# random number helps ensure it's pretty close to unique. Since it's a
# transitory value (it never gets saved) we can initialize once, and it
# doesn't matter if it changes on a restart.
REFERENCE_PLACEHOLDER = "_reference_#{SecureRandom.hex(16)}_"
REFERENCE_PLACEHOLDER_PATTERN = %r{#{REFERENCE_PLACEHOLDER}(\d+)}.freeze
def self.object_class def self.object_class
# Implement in child class # Implement in child class
# Example: MergeRequest # Example: MergeRequest
...@@ -389,6 +397,14 @@ module Banzai ...@@ -389,6 +397,14 @@ module Banzai
def escape_html_entities(text) def escape_html_entities(text)
CGI.escapeHTML(text.to_s) CGI.escapeHTML(text.to_s)
end end
def escape_with_placeholders(text, placeholder_data)
escaped = escape_html_entities(text)
escaped.gsub(REFERENCE_PLACEHOLDER_PATTERN) do |match|
placeholder_data[$1.to_i]
end
end
end end
end end
end end
......
...@@ -14,24 +14,24 @@ module Banzai ...@@ -14,24 +14,24 @@ module Banzai
find_labels(parent_object).find(id) find_labels(parent_object).find(id)
end end
def self.references_in(text, pattern = Label.reference_pattern)
unescape_html_entities(text).gsub(pattern) do |match|
yield match, $~[:label_id].to_i, $~[:label_name], $~[:project], $~[:namespace], $~
end
end
def references_in(text, pattern = Label.reference_pattern) def references_in(text, pattern = Label.reference_pattern)
unescape_html_entities(text).gsub(pattern) do |match| labels = {}
unescaped_html = unescape_html_entities(text).gsub(pattern) do |match|
namespace, project = $~[:namespace], $~[:project] namespace, project = $~[:namespace], $~[:project]
project_path = full_project_path(namespace, project) project_path = full_project_path(namespace, project)
label = find_label(project_path, $~[:label_id], $~[:label_name]) label = find_label(project_path, $~[:label_id], $~[:label_name])
if label if label
yield match, label.id, project, namespace, $~ labels[label.id] = yield match, label.id, project, namespace, $~
"#{REFERENCE_PLACEHOLDER}#{label.id}"
else else
escape_html_entities(match) match
end end
end end
return text if labels.empty?
escape_with_placeholders(unescaped_html, labels)
end end
def find_label(parent_ref, label_id, label_name) def find_label(parent_ref, label_id, label_name)
......
...@@ -51,15 +51,21 @@ module Banzai ...@@ -51,15 +51,21 @@ module Banzai
# default implementation. # default implementation.
return super(text, pattern) if pattern != Milestone.reference_pattern return super(text, pattern) if pattern != Milestone.reference_pattern
unescape_html_entities(text).gsub(pattern) do |match| milestones = {}
unescaped_html = unescape_html_entities(text).gsub(pattern) do |match|
milestone = find_milestone($~[:project], $~[:namespace], $~[:milestone_iid], $~[:milestone_name]) milestone = find_milestone($~[:project], $~[:namespace], $~[:milestone_iid], $~[:milestone_name])
if milestone if milestone
yield match, milestone.id, $~[:project], $~[:namespace], $~ milestones[milestone.id] = yield match, milestone.id, $~[:project], $~[:namespace], $~
"#{REFERENCE_PLACEHOLDER}#{milestone.id}"
else else
escape_html_entities(match) match
end end
end end
return text if milestones.empty?
escape_with_placeholders(unescaped_html, milestones)
end end
def find_milestone(project_ref, namespace_ref, milestone_id, milestone_name) def find_milestone(project_ref, namespace_ref, milestone_id, milestone_name)
......
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
module Gitlab module Gitlab
module MarkdownCache module MarkdownCache
# Increment this number every time the renderer changes its output # Increment this number every time the renderer changes its output
CACHE_COMMONMARK_VERSION = 17
CACHE_COMMONMARK_VERSION_START = 10 CACHE_COMMONMARK_VERSION_START = 10
CACHE_COMMONMARK_VERSION = 16
BaseError = Class.new(StandardError) BaseError = Class.new(StandardError)
UnsupportedClassError = Class.new(BaseError) UnsupportedClassError = Class.new(BaseError)
......
...@@ -10,6 +10,11 @@ describe Banzai::Filter::LabelReferenceFilter do ...@@ -10,6 +10,11 @@ describe Banzai::Filter::LabelReferenceFilter do
let(:label) { create(:label, project: project) } let(:label) { create(:label, project: project) }
let(:reference) { label.to_reference } let(:reference) { label.to_reference }
it_behaves_like 'HTML text with references' do
let(:resource) { label }
let(:resource_text) { resource.title }
end
it 'requires project context' do it 'requires project context' do
expect { described_class.call('') }.to raise_error(ArgumentError, /:project/) expect { described_class.call('') }.to raise_error(ArgumentError, /:project/)
end end
......
...@@ -329,6 +329,10 @@ describe Banzai::Filter::MilestoneReferenceFilter do ...@@ -329,6 +329,10 @@ describe Banzai::Filter::MilestoneReferenceFilter do
it_behaves_like 'cross-project / same-namespace complete reference' it_behaves_like 'cross-project / same-namespace complete reference'
it_behaves_like 'cross project shorthand reference' it_behaves_like 'cross project shorthand reference'
it_behaves_like 'references with HTML entities' it_behaves_like 'references with HTML entities'
it_behaves_like 'HTML text with references' do
let(:resource) { milestone }
let(:resource_text) { "#{resource.class.reference_prefix}#{resource.title}" }
end
end end
shared_context 'group milestones' do shared_context 'group milestones' do
...@@ -340,6 +344,10 @@ describe Banzai::Filter::MilestoneReferenceFilter do ...@@ -340,6 +344,10 @@ describe Banzai::Filter::MilestoneReferenceFilter do
it_behaves_like 'String-based multi-word references in quotes' it_behaves_like 'String-based multi-word references in quotes'
it_behaves_like 'referencing a milestone in a link href' it_behaves_like 'referencing a milestone in a link href'
it_behaves_like 'references with HTML entities' it_behaves_like 'references with HTML entities'
it_behaves_like 'HTML text with references' do
let(:resource) { milestone }
let(:resource_text) { "#{resource.class.reference_prefix}#{resource.title}" }
end
it 'does not support references by IID' do it 'does not support references by IID' do
doc = reference_filter("See #{Milestone.reference_prefix}#{milestone.iid}") doc = reference_filter("See #{Milestone.reference_prefix}#{milestone.iid}")
......
# frozen_string_literal: true
RSpec.shared_examples 'HTML text with references' do
let(:markdown_prepend) { "&lt;img src=\"\" onerror=alert(`bug`)&gt;" }
it 'preserves escaped HTML text and adds valid references' do
reference = resource.to_reference(format: :name)
doc = reference_filter("#{markdown_prepend}#{reference}")
expect(doc.to_html).to start_with(markdown_prepend)
expect(doc.text).to eq %(<img src="" onerror=alert(`bug`)>#{resource_text})
end
it 'preserves escaped HTML text if there are no valid references' do
reference = "#{resource.class.reference_prefix}invalid"
text = "#{markdown_prepend}#{reference}"
doc = reference_filter(text)
expect(doc.to_html).to eq text
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment