Make CI refs matching to to use UntrustedRegexp

This makes ref validation to use always `UntrustedRegexp`. This also splits the existing RubySyntax into separate class.

Make CI refs matching to to use UntrustedRegexp
This makes ref validation to use always `UntrustedRegexp`. This also splits the existing RubySyntax into separate class.
b22287f0 · Kamil Trzciński · 80fea82f · b22287f0 · b22287f0 · b22287f0
Commit b22287f0 authored Feb 25, 2019 by Kamil Trzciński
15 changed files
--- a/changelogs/unreleased/use-untrusted-regexp.yml
+++ b/changelogs/unreleased/use-untrusted-regexp.yml
+---
+title: Use UntrustedRegexp for matching refs policy
+merge_request:
+author:
+type: security
--- a/doc/ci/yaml/README.md
+++ b/doc/ci/yaml/README.md
@@ -255,6 +255,19 @@ job:
    - branches
 ```

+Pattern matching is case-sensitive by default. Use `i` flag modifier, like
+`/pattern/i` to make a pattern case-insensitive:
+
+```yaml
+job:
+  # use regexp
+  only:
+    - /^issue-.*$/i
+  # use special keyword
+  except:
+    - branches
+```
+
 In this example, `job` will run only for refs that are tagged, or if a build is
 explicitly requested via an API trigger or a [Pipeline Schedule][schedules]:


--- a/lib/gitlab/ci/build/policy/refs.rb
+++ b/lib/gitlab/ci/build/policy/refs.rb
@@ -35,8 +35,8 @@ module Gitlab
            # patterns can be matched only when branch or tag is used
            # the pattern matching does not work for merge requests pipelines
            if pipeline.branch? || pipeline.tag?
-              if pattern.first == "/" && pattern.last == "/"
-                Regexp.new(pattern[1...-1]) =~ pipeline.ref
+              if regexp = Gitlab::UntrustedRegexp::RubySyntax.fabricate(pattern)
+                regexp.match?(pipeline.ref)
              else
                pattern == pipeline.ref
              end

--- a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb
+++ b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb
@@ -13,13 +13,13 @@ module Gitlab
            def initialize(regexp)
              @value = regexp

-              unless Gitlab::UntrustedRegexp.valid?(@value)
+              unless Gitlab::UntrustedRegexp::RubySyntax.valid?(@value)
                raise Lexer::SyntaxError, 'Invalid regular expression!'
              end
            end

            def evaluate(variables = {})
-              Gitlab::UntrustedRegexp.fabricate(@value)
+              Gitlab::UntrustedRegexp::RubySyntax.fabricate!(@value)
            rescue RegexpError
              raise Expression::RuntimeError, 'Invalid regular expression!'
            end

--- a/lib/gitlab/config/entry/legacy_validation_helpers.rb
+++ b/lib/gitlab/config/entry/legacy_validation_helpers.rb
@@ -45,17 +45,15 @@ module Gitlab
        end

        def validate_regexp(value)
-          !value.nil? && Regexp.new(value.to_s) && true
-        rescue RegexpError, TypeError
-          false
+          Gitlab::UntrustedRegexp::RubySyntax.valid?(value)
        end

        def validate_string_or_regexp(value)
          return true if value.is_a?(Symbol)
          return false unless value.is_a?(String)

-          if value.first == '/' && value.last == '/'
-            validate_regexp(value[1...-1])
+          if Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value)
+            validate_regexp(value)
          else
            true
          end

--- a/lib/gitlab/config/entry/validators.rb
+++ b/lib/gitlab/config/entry/validators.rb
@@ -120,17 +120,13 @@ module Gitlab

          private

-          def look_like_regexp?(value)
-            value.is_a?(String) && value.start_with?('/') &&
-              value.end_with?('/')
+          def matches_syntax?(value)
+            Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value)
          end

          def validate_regexp(value)
-            look_like_regexp?(value) &&
-              Regexp.new(value.to_s[1...-1]) &&
-              true
-          rescue RegexpError
-            false
+            matches_syntax?(value) &&
+              Gitlab::UntrustedRegexp::RubySyntax.valid?(value)
          end
        end

@@ -149,7 +145,7 @@ module Gitlab

          def validate_string_or_regexp(value)
            return false unless value.is_a?(String)
-            return validate_regexp(value) if look_like_regexp?(value)
+            return validate_regexp(value) if matches_syntax?(value)

            true
          end

--- a/lib/gitlab/untrusted_regexp.rb
+++ b/lib/gitlab/untrusted_regexp.rb
@@ -35,6 +35,10 @@ module Gitlab
      matches
    end

+    def match?(text)
+      text.present? && scan(text).present?
+    end
+
    def replace(text, rewrite)
      RE2.Replace(text, regexp, rewrite)
    end
@@ -43,37 +47,6 @@ module Gitlab
      self.source == other.source
    end

-    # Handles regular expressions with the preferred RE2 library where possible
-    # via UntustedRegex. Falls back to Ruby's built-in regular expression library
-    # when the syntax would be invalid in RE2.
-    #
-    # One difference between these is `(?m)` multi-line mode. Ruby regex enables
-    # this by default, but also handles `^` and `$` differently.
-    # See: https://www.regular-expressions.info/modifiers.html
-    def self.with_fallback(pattern, multiline: false)
-      UntrustedRegexp.new(pattern, multiline: multiline)
-    rescue RegexpError
-      Regexp.new(pattern)
-    end
-
-    def self.valid?(pattern)
-      !!self.fabricate(pattern)
-    rescue RegexpError
-      false
-    end
-
-    def self.fabricate(pattern)
-      matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$})
-
-      raise RegexpError, 'Invalid regular expression!' if matches.nil?
-
-      expression = matches[:regexp]
-      flags = matches[:flags]
-      expression.prepend("(?#{flags})") if flags.present?
-
-      self.new(expression, multiline: false)
-    end
-
    private

    attr_reader :regexp

--- a/lib/gitlab/untrusted_regexp/ruby_syntax.rb
+++ b/lib/gitlab/untrusted_regexp/ruby_syntax.rb
+# frozen_string_literal: true
+
+module Gitlab
+  class UntrustedRegexp
+    # This class implements support for Ruby syntax of regexps
+    # and converts that to RE2 representation:
+    # /<regexp>/<flags>
+    class RubySyntax
+      PATTERN = %r{^/(?<regexp>.+)/(?<flags>[ismU]*)$}.freeze
+
+      # Checks if pattern matches a regexp pattern
+      # but does not enforce it's validity
+      def self.matches_syntax?(pattern)
+        pattern.is_a?(String) && pattern.match(PATTERN).present?
+      end
+
+      # The regexp can match the pattern `/.../`, but may not be fabricatable:
+      # it can be invalid or incomplete: `/match ( string/`
+      def self.valid?(pattern)
+        !!self.fabricate(pattern)
+      end
+
+      def self.fabricate(pattern)
+        self.fabricate!(pattern)
+      rescue RegexpError
+        nil
+      end
+
+      def self.fabricate!(pattern)
+        raise RegexpError, 'Pattern is not string!' unless pattern.is_a?(String)
+
+        matches = pattern.match(PATTERN)
+        raise RegexpError, 'Invalid regular expression!' if matches.nil?
+
+        expression = matches[:regexp]
+        flags = matches[:flags]
+        expression.prepend("(?#{flags})") if flags.present?
+
+        UntrustedRegexp.new(expression, multiline: false)
+      end
+    end
+  end
+end
--- a/spec/lib/gitlab/ci/build/policy/refs_spec.rb
+++ b/spec/lib/gitlab/ci/build/policy/refs_spec.rb
@@ -92,10 +92,23 @@ describe Gitlab::Ci::Build::Policy::Refs do
          .to be_satisfied_by(pipeline)
      end

+      it 'is satisfied when case-insensitive regexp matches pipeline ref' do
+        expect(described_class.new(['/DOCS-.*/i']))
+          .to be_satisfied_by(pipeline)
+      end
+
      it 'is not satisfied when regexp does not match pipeline ref' do
        expect(described_class.new(['/fix-.*/']))
          .not_to be_satisfied_by(pipeline)
      end
    end
+
+    context 'malicious regexp' do
+      let(:pipeline) { build_stubbed(:ci_pipeline, ref: malicious_text) }
+
+      subject { described_class.new([malicious_regexp_ruby]) }
+
+      include_examples 'malicious regexp'
+    end
  end
 end
--- a/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb
+++ b/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb
@@ -85,7 +85,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexeme::Pattern do
    end

    it 'raises error if evaluated regexp is not valid' do
-      allow(Gitlab::UntrustedRegexp).to receive(:valid?).and_return(true)
+      allow(Gitlab::UntrustedRegexp::RubySyntax).to receive(:valid?).and_return(true)

      regexp = described_class.new('/invalid ( .*/')


--- a/spec/lib/gitlab/ci/trace/stream_spec.rb
+++ b/spec/lib/gitlab/ci/trace/stream_spec.rb
@@ -414,7 +414,7 @@ describe Gitlab::Ci::Trace::Stream, :clean_gitlab_redis_cache do

      context 'malicious regexp' do
        let(:data) { malicious_text }
-        let(:regex) { malicious_regexp }
+        let(:regex) { malicious_regexp_re2 }

        include_examples 'malicious regexp'
      end

--- a/spec/lib/gitlab/route_map_spec.rb
+++ b/spec/lib/gitlab/route_map_spec.rb
@@ -60,7 +60,7 @@ describe Gitlab::RouteMap do

      subject do
        map = described_class.new(<<-"MAP".strip_heredoc)
-        - source: '#{malicious_regexp}'
+        - source: '#{malicious_regexp_re2}'
          public: '/'
        MAP


--- a/spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb
+++ b/spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb
+require 'fast_spec_helper'
+require 'support/shared_examples/malicious_regexp_shared_examples'
+
+describe Gitlab::UntrustedRegexp::RubySyntax do
+  describe '.matches_syntax?' do
+    it 'returns true if regexp is valid' do
+      expect(described_class.matches_syntax?('/some .* thing/'))
+        .to be true
+    end
+
+    it 'returns true if regexp is invalid, but resembles regexp' do
+      expect(described_class.matches_syntax?('/some ( thing/'))
+        .to be true
+    end
+  end
+
+  describe '.valid?' do
+    it 'returns true if regexp is valid' do
+      expect(described_class.valid?('/some .* thing/'))
+        .to be true
+    end
+
+    it 'returns false if regexp is invalid' do
+      expect(described_class.valid?('/some ( thing/'))
+        .to be false
+    end
+  end
+
+  describe '.fabricate' do
+    context 'when regexp is valid' do
+      it 'fabricates regexp without flags' do
+        expect(described_class.fabricate('/some .* thing/')).not_to be_nil
+      end
+    end
+
+    context 'when regexp is a raw pattern' do
+      it 'returns error' do
+        expect(described_class.fabricate('some .* thing')).to be_nil
+      end
+    end
+  end
+
+  describe '.fabricate!' do
+    context 'when regexp is using /regexp/ scheme with flags' do
+      it 'fabricates regexp with a single flag' do
+        regexp = described_class.fabricate!('/something/i')
+
+        expect(regexp).to eq Gitlab::UntrustedRegexp.new('(?i)something')
+        expect(regexp.scan('SOMETHING')).to be_one
+      end
+
+      it 'fabricates regexp with multiple flags' do
+        regexp = described_class.fabricate!('/something/im')
+
+        expect(regexp).to eq Gitlab::UntrustedRegexp.new('(?im)something')
+      end
+
+      it 'fabricates regexp without flags' do
+        regexp = described_class.fabricate!('/something/')
+
+        expect(regexp).to eq Gitlab::UntrustedRegexp.new('something')
+      end
+    end
+
+    context 'when regexp is a raw pattern' do
+      it 'raises an error' do
+        expect { described_class.fabricate!('some .* thing') }
+          .to raise_error(RegexpError)
+      end
+    end
+  end
+end
--- a/spec/lib/gitlab/untrusted_regexp_spec.rb
+++ b/spec/lib/gitlab/untrusted_regexp_spec.rb
@@ -2,48 +2,6 @@ require 'fast_spec_helper'
 require 'support/shared_examples/malicious_regexp_shared_examples'

 describe Gitlab::UntrustedRegexp do
-  describe '.valid?' do
-    it 'returns true if regexp is valid' do
-      expect(described_class.valid?('/some ( thing/'))
-        .to be false
-    end
-
-    it 'returns true if regexp is invalid' do
-      expect(described_class.valid?('/some .* thing/'))
-        .to be true
-    end
-  end
-
-  describe '.fabricate' do
-    context 'when regexp is using /regexp/ scheme with flags' do
-      it 'fabricates regexp with a single flag' do
-        regexp = described_class.fabricate('/something/i')
-
-        expect(regexp).to eq described_class.new('(?i)something')
-        expect(regexp.scan('SOMETHING')).to be_one
-      end
-
-      it 'fabricates regexp with multiple flags' do
-        regexp = described_class.fabricate('/something/im')
-
-        expect(regexp).to eq described_class.new('(?im)something')
-      end
-
-      it 'fabricates regexp without flags' do
-        regexp = described_class.fabricate('/something/')
-
-        expect(regexp).to eq described_class.new('something')
-      end
-    end
-
-    context 'when regexp is a raw pattern' do
-      it 'raises an error' do
-        expect { described_class.fabricate('some .* thing') }
-          .to raise_error(RegexpError)
-      end
-    end
-  end
-
  describe '#initialize' do
    subject { described_class.new(pattern) }

@@ -92,11 +50,41 @@ describe Gitlab::UntrustedRegexp do
    end
  end

+  describe '#match?' do
+    subject { described_class.new(regexp).match?(text) }
+
+    context 'malicious regexp' do
+      let(:text) { malicious_text }
+      let(:regexp) { malicious_regexp_re2 }
+
+      include_examples 'malicious regexp'
+    end
+
+    context 'matching regexp' do
+      let(:regexp) { 'foo' }
+      let(:text) { 'foo' }
+
+      it 'returns an array of nil matches' do
+        is_expected.to eq(true)
+      end
+    end
+
+    context 'non-matching regexp' do
+      let(:regexp) { 'boo' }
+      let(:text) { 'foo' }
+
+      it 'returns an array of nil matches' do
+        is_expected.to eq(false)
+      end
+    end
+  end
+
  describe '#scan' do
    subject { described_class.new(regexp).scan(text) }
+
    context 'malicious regexp' do
      let(:text) { malicious_text }
-      let(:regexp) { malicious_regexp }
+      let(:regexp) { malicious_regexp_re2 }

      include_examples 'malicious regexp'
    end

--- a/spec/support/shared_examples/malicious_regexp_shared_examples.rb
+++ b/spec/support/shared_examples/malicious_regexp_shared_examples.rb
@@ -2,7 +2,8 @@ require 'timeout'

 shared_examples 'malicious regexp' do
  let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' }
-  let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }
+  let(:malicious_regexp_re2) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }
+  let(:malicious_regexp_ruby) { '/^(([a-z])+.)+[A-Z]([a-z])+$/i' }

  it 'takes under a second' do
    expect { Timeout.timeout(1) { subject } }.not_to raise_error