diff --git a/changelogs/unreleased/20099-markdown-with-yaml-rendering-doesn-t-handle-utf8-bom.yml b/changelogs/unreleased/20099-markdown-with-yaml-rendering-doesn-t-handle-utf8-bom.yml new file mode 100644 index 0000000000000000000000000000000000000000..d6dec816abbe2cdee10166af6ea0fb7095c22c53 --- /dev/null +++ b/changelogs/unreleased/20099-markdown-with-yaml-rendering-doesn-t-handle-utf8-bom.yml @@ -0,0 +1,5 @@ +--- +title: Tolerate UTF8 BOM character during frontmatter rendering +merge_request: 46062 +author: +type: fixed diff --git a/lib/banzai/filter/normalize_source_filter.rb b/lib/banzai/filter/normalize_source_filter.rb new file mode 100644 index 0000000000000000000000000000000000000000..975cd5408730f62255df0d644cad1c1241522ef5 --- /dev/null +++ b/lib/banzai/filter/normalize_source_filter.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Banzai + module Filter + class NormalizeSourceFilter < HTML::Pipeline::Filter + UTF8_BOM = "\xEF\xBB\xBF" + + def call + # Remove UTF8_BOM from beginning of source text + html.delete_prefix(UTF8_BOM) + end + end + end +end diff --git a/lib/banzai/pipeline/pre_process_pipeline.rb b/lib/banzai/pipeline/pre_process_pipeline.rb index 4c2b4ca16652f181fbdb3ea7419ebf2a9705c050..1f7cb437fcdbb11eec62d77906f302a3549d0c3c 100644 --- a/lib/banzai/pipeline/pre_process_pipeline.rb +++ b/lib/banzai/pipeline/pre_process_pipeline.rb @@ -5,6 +5,7 @@ module Banzai class PreProcessPipeline < BasePipeline def self.filters FilterArray[ + Filter::NormalizeSourceFilter, Filter::FrontMatterFilter, Filter::BlockquoteFenceFilter, ] diff --git a/spec/lib/banzai/filter/normalize_source_filter_spec.rb b/spec/lib/banzai/filter/normalize_source_filter_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8eaeec0e7b00b1a9681d45dba1cdbbbcc66a290c --- /dev/null +++ b/spec/lib/banzai/filter/normalize_source_filter_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Banzai::Filter::NormalizeSourceFilter do + include FilterSpecHelper + + it 'removes the UTF8 BOM from the beginning of the text' do + content = "\xEF\xBB\xBF---" + + output = filter(content) + + expect(output).to match '---' + end + + it 'does not remove those characters from anywhere else in the text' do + content = <<~MD + \xEF\xBB\xBF--- + \xEF\xBB\xBF--- + MD + + output = filter(content) + + expect(output).to match "---\n\xEF\xBB\xBF---\n" + end +end diff --git a/spec/lib/banzai/pipeline/pre_process_pipeline_spec.rb b/spec/lib/banzai/pipeline/pre_process_pipeline_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..fc74c592867e1dbc8edca39bf8f7a588bf92406e --- /dev/null +++ b/spec/lib/banzai/pipeline/pre_process_pipeline_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Banzai::Pipeline::PreProcessPipeline do + it 'pre-processes the source text' do + markdown = <<~MD + \xEF\xBB\xBF--- + foo: :foo_symbol + bar: :bar_symbol + --- + + >>> + blockquote + >>> + MD + + result = described_class.call(markdown, {}) + + aggregate_failures do + expect(result[:output]).not_to include "\xEF\xBB\xBF" + expect(result[:output]).not_to include '---' + expect(result[:output]).to include "```yaml\nfoo: :foo_symbol\n" + expect(result[:output]).to include "> blockquote\n" + end + end +end