Commit b22287f0 authored by Kamil Trzciński's avatar Kamil Trzciński

Make CI refs matching to to use UntrustedRegexp

This makes ref validation to use always `UntrustedRegexp`.
This also splits the existing RubySyntax into separate
class.
parent 80fea82f
---
title: Use UntrustedRegexp for matching refs policy
merge_request:
author:
type: security
......@@ -255,6 +255,19 @@ job:
- branches
```
Pattern matching is case-sensitive by default. Use `i` flag modifier, like
`/pattern/i` to make a pattern case-insensitive:
```yaml
job:
# use regexp
only:
- /^issue-.*$/i
# use special keyword
except:
- branches
```
In this example, `job` will run only for refs that are tagged, or if a build is
explicitly requested via an API trigger or a [Pipeline Schedule][schedules]:
......
......@@ -35,8 +35,8 @@ module Gitlab
# patterns can be matched only when branch or tag is used
# the pattern matching does not work for merge requests pipelines
if pipeline.branch? || pipeline.tag?
if pattern.first == "/" && pattern.last == "/"
Regexp.new(pattern[1...-1]) =~ pipeline.ref
if regexp = Gitlab::UntrustedRegexp::RubySyntax.fabricate(pattern)
regexp.match?(pipeline.ref)
else
pattern == pipeline.ref
end
......
......@@ -13,13 +13,13 @@ module Gitlab
def initialize(regexp)
@value = regexp
unless Gitlab::UntrustedRegexp.valid?(@value)
unless Gitlab::UntrustedRegexp::RubySyntax.valid?(@value)
raise Lexer::SyntaxError, 'Invalid regular expression!'
end
end
def evaluate(variables = {})
Gitlab::UntrustedRegexp.fabricate(@value)
Gitlab::UntrustedRegexp::RubySyntax.fabricate!(@value)
rescue RegexpError
raise Expression::RuntimeError, 'Invalid regular expression!'
end
......
......@@ -45,17 +45,15 @@ module Gitlab
end
def validate_regexp(value)
!value.nil? && Regexp.new(value.to_s) && true
rescue RegexpError, TypeError
false
Gitlab::UntrustedRegexp::RubySyntax.valid?(value)
end
def validate_string_or_regexp(value)
return true if value.is_a?(Symbol)
return false unless value.is_a?(String)
if value.first == '/' && value.last == '/'
validate_regexp(value[1...-1])
if Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value)
validate_regexp(value)
else
true
end
......
......@@ -120,17 +120,13 @@ module Gitlab
private
def look_like_regexp?(value)
value.is_a?(String) && value.start_with?('/') &&
value.end_with?('/')
def matches_syntax?(value)
Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value)
end
def validate_regexp(value)
look_like_regexp?(value) &&
Regexp.new(value.to_s[1...-1]) &&
true
rescue RegexpError
false
matches_syntax?(value) &&
Gitlab::UntrustedRegexp::RubySyntax.valid?(value)
end
end
......@@ -149,7 +145,7 @@ module Gitlab
def validate_string_or_regexp(value)
return false unless value.is_a?(String)
return validate_regexp(value) if look_like_regexp?(value)
return validate_regexp(value) if matches_syntax?(value)
true
end
......
......@@ -35,6 +35,10 @@ module Gitlab
matches
end
def match?(text)
text.present? && scan(text).present?
end
def replace(text, rewrite)
RE2.Replace(text, regexp, rewrite)
end
......@@ -43,37 +47,6 @@ module Gitlab
self.source == other.source
end
# Handles regular expressions with the preferred RE2 library where possible
# via UntustedRegex. Falls back to Ruby's built-in regular expression library
# when the syntax would be invalid in RE2.
#
# One difference between these is `(?m)` multi-line mode. Ruby regex enables
# this by default, but also handles `^` and `$` differently.
# See: https://www.regular-expressions.info/modifiers.html
def self.with_fallback(pattern, multiline: false)
UntrustedRegexp.new(pattern, multiline: multiline)
rescue RegexpError
Regexp.new(pattern)
end
def self.valid?(pattern)
!!self.fabricate(pattern)
rescue RegexpError
false
end
def self.fabricate(pattern)
matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$})
raise RegexpError, 'Invalid regular expression!' if matches.nil?
expression = matches[:regexp]
flags = matches[:flags]
expression.prepend("(?#{flags})") if flags.present?
self.new(expression, multiline: false)
end
private
attr_reader :regexp
......
# frozen_string_literal: true
module Gitlab
class UntrustedRegexp
# This class implements support for Ruby syntax of regexps
# and converts that to RE2 representation:
# /<regexp>/<flags>
class RubySyntax
PATTERN = %r{^/(?<regexp>.+)/(?<flags>[ismU]*)$}.freeze
# Checks if pattern matches a regexp pattern
# but does not enforce it's validity
def self.matches_syntax?(pattern)
pattern.is_a?(String) && pattern.match(PATTERN).present?
end
# The regexp can match the pattern `/.../`, but may not be fabricatable:
# it can be invalid or incomplete: `/match ( string/`
def self.valid?(pattern)
!!self.fabricate(pattern)
end
def self.fabricate(pattern)
self.fabricate!(pattern)
rescue RegexpError
nil
end
def self.fabricate!(pattern)
raise RegexpError, 'Pattern is not string!' unless pattern.is_a?(String)
matches = pattern.match(PATTERN)
raise RegexpError, 'Invalid regular expression!' if matches.nil?
expression = matches[:regexp]
flags = matches[:flags]
expression.prepend("(?#{flags})") if flags.present?
UntrustedRegexp.new(expression, multiline: false)
end
end
end
end
......@@ -92,10 +92,23 @@ describe Gitlab::Ci::Build::Policy::Refs do
.to be_satisfied_by(pipeline)
end
it 'is satisfied when case-insensitive regexp matches pipeline ref' do
expect(described_class.new(['/DOCS-.*/i']))
.to be_satisfied_by(pipeline)
end
it 'is not satisfied when regexp does not match pipeline ref' do
expect(described_class.new(['/fix-.*/']))
.not_to be_satisfied_by(pipeline)
end
end
context 'malicious regexp' do
let(:pipeline) { build_stubbed(:ci_pipeline, ref: malicious_text) }
subject { described_class.new([malicious_regexp_ruby]) }
include_examples 'malicious regexp'
end
end
end
......@@ -85,7 +85,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexeme::Pattern do
end
it 'raises error if evaluated regexp is not valid' do
allow(Gitlab::UntrustedRegexp).to receive(:valid?).and_return(true)
allow(Gitlab::UntrustedRegexp::RubySyntax).to receive(:valid?).and_return(true)
regexp = described_class.new('/invalid ( .*/')
......
......@@ -414,7 +414,7 @@ describe Gitlab::Ci::Trace::Stream, :clean_gitlab_redis_cache do
context 'malicious regexp' do
let(:data) { malicious_text }
let(:regex) { malicious_regexp }
let(:regex) { malicious_regexp_re2 }
include_examples 'malicious regexp'
end
......
......@@ -60,7 +60,7 @@ describe Gitlab::RouteMap do
subject do
map = described_class.new(<<-"MAP".strip_heredoc)
- source: '#{malicious_regexp}'
- source: '#{malicious_regexp_re2}'
public: '/'
MAP
......
require 'fast_spec_helper'
require 'support/shared_examples/malicious_regexp_shared_examples'
describe Gitlab::UntrustedRegexp::RubySyntax do
describe '.matches_syntax?' do
it 'returns true if regexp is valid' do
expect(described_class.matches_syntax?('/some .* thing/'))
.to be true
end
it 'returns true if regexp is invalid, but resembles regexp' do
expect(described_class.matches_syntax?('/some ( thing/'))
.to be true
end
end
describe '.valid?' do
it 'returns true if regexp is valid' do
expect(described_class.valid?('/some .* thing/'))
.to be true
end
it 'returns false if regexp is invalid' do
expect(described_class.valid?('/some ( thing/'))
.to be false
end
end
describe '.fabricate' do
context 'when regexp is valid' do
it 'fabricates regexp without flags' do
expect(described_class.fabricate('/some .* thing/')).not_to be_nil
end
end
context 'when regexp is a raw pattern' do
it 'returns error' do
expect(described_class.fabricate('some .* thing')).to be_nil
end
end
end
describe '.fabricate!' do
context 'when regexp is using /regexp/ scheme with flags' do
it 'fabricates regexp with a single flag' do
regexp = described_class.fabricate!('/something/i')
expect(regexp).to eq Gitlab::UntrustedRegexp.new('(?i)something')
expect(regexp.scan('SOMETHING')).to be_one
end
it 'fabricates regexp with multiple flags' do
regexp = described_class.fabricate!('/something/im')
expect(regexp).to eq Gitlab::UntrustedRegexp.new('(?im)something')
end
it 'fabricates regexp without flags' do
regexp = described_class.fabricate!('/something/')
expect(regexp).to eq Gitlab::UntrustedRegexp.new('something')
end
end
context 'when regexp is a raw pattern' do
it 'raises an error' do
expect { described_class.fabricate!('some .* thing') }
.to raise_error(RegexpError)
end
end
end
end
......@@ -2,48 +2,6 @@ require 'fast_spec_helper'
require 'support/shared_examples/malicious_regexp_shared_examples'
describe Gitlab::UntrustedRegexp do
describe '.valid?' do
it 'returns true if regexp is valid' do
expect(described_class.valid?('/some ( thing/'))
.to be false
end
it 'returns true if regexp is invalid' do
expect(described_class.valid?('/some .* thing/'))
.to be true
end
end
describe '.fabricate' do
context 'when regexp is using /regexp/ scheme with flags' do
it 'fabricates regexp with a single flag' do
regexp = described_class.fabricate('/something/i')
expect(regexp).to eq described_class.new('(?i)something')
expect(regexp.scan('SOMETHING')).to be_one
end
it 'fabricates regexp with multiple flags' do
regexp = described_class.fabricate('/something/im')
expect(regexp).to eq described_class.new('(?im)something')
end
it 'fabricates regexp without flags' do
regexp = described_class.fabricate('/something/')
expect(regexp).to eq described_class.new('something')
end
end
context 'when regexp is a raw pattern' do
it 'raises an error' do
expect { described_class.fabricate('some .* thing') }
.to raise_error(RegexpError)
end
end
end
describe '#initialize' do
subject { described_class.new(pattern) }
......@@ -92,11 +50,41 @@ describe Gitlab::UntrustedRegexp do
end
end
describe '#match?' do
subject { described_class.new(regexp).match?(text) }
context 'malicious regexp' do
let(:text) { malicious_text }
let(:regexp) { malicious_regexp_re2 }
include_examples 'malicious regexp'
end
context 'matching regexp' do
let(:regexp) { 'foo' }
let(:text) { 'foo' }
it 'returns an array of nil matches' do
is_expected.to eq(true)
end
end
context 'non-matching regexp' do
let(:regexp) { 'boo' }
let(:text) { 'foo' }
it 'returns an array of nil matches' do
is_expected.to eq(false)
end
end
end
describe '#scan' do
subject { described_class.new(regexp).scan(text) }
context 'malicious regexp' do
let(:text) { malicious_text }
let(:regexp) { malicious_regexp }
let(:regexp) { malicious_regexp_re2 }
include_examples 'malicious regexp'
end
......
......@@ -2,7 +2,8 @@ require 'timeout'
shared_examples 'malicious regexp' do
let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' }
let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }
let(:malicious_regexp_re2) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }
let(:malicious_regexp_ruby) { '/^(([a-z])+.)+[A-Z]([a-z])+$/i' }
it 'takes under a second' do
expect { Timeout.timeout(1) { subject } }.not_to raise_error
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment