path_regex.rb 7.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
module Gitlab
  module PathRegex
    extend self

    # All routes that appear on the top level must be listed here.
    # This will make sure that groups cannot be created with these names
    # as these routes would be masked by the paths already in place.
    #
    # Example:
    #   /api/api-project
    #
    #  the path `api` shouldn't be allowed because it would be masked by `api/*`
    #
    TOP_LEVEL_ROUTES = %w[
      -
      .well-known
17 18 19 20 21
      404.html
      422.html
      500.html
      502.html
      503.html
22 23 24
      abuse_reports
      admin
      api
25 26
      apple-touch-icon-precomposed.png
      apple-touch-icon.png
27 28 29 30
      assets
      autocomplete
      ci
      dashboard
31
      deploy.html
32
      explore
33
      favicon.ico
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
      files
      groups
      health_check
      help
      import
      invites
      jwt
      koding
      notification_settings
      oauth
      profile
      projects
      public
      robots.txt
      s
      search
      sent_notifications
51
      slash-command-logo.png
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
      snippets
      u
      unsubscribes
      uploads
      users
    ].freeze

    # This list should contain all words following `/*namespace_id/:project_id` in
    # routes that contain a second wildcard.
    #
    # Example:
    #   /*namespace_id/:project_id/badges/*ref/build
    #
    # If `badges` was allowed as a project/group name, we would not be able to access the
    # `badges` route for those projects:
    #
    # Consider a namespace with path `foo/bar` and a project called `badges`.
    # The route to the build badge would then be `/foo/bar/badges/badges/master/build.svg`
    #
    # When accessing this path the route would be matched to the `badges` path
    # with the following params:
    #   - namespace_id: `foo`
    #   - project_id: `bar`
    #   - ref: `badges/master`
    #
    # Failing to find the project, this would result in a 404.
    #
    # By rejecting `badges` the router can _count_ on the fact that `badges` will
    # be preceded by the `namespace/project`.
    PROJECT_WILDCARD_ROUTES = %w[
82
      -
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
      badges
      blame
      blob
      builds
      commits
      create
      create_dir
      edit
      environments/folders
      files
      find_file
      gitlab-lfs/objects
      info/lfs/objects
      new
      preview
      raw
      refs
      tree
      update
      wikis
    ].freeze

    # These are all the paths that follow `/groups/*id/ or `/groups/*group_id`
    # We need to reject these because we have a `/groups/*id` page that is the same
    # as the `/*id`.
    #
    # If we would allow a subgroup to be created with the name `activity` then
    # this group would not be accessible through `/groups/parent/activity` since
    # this would map to the activity-page of its parent.
    GROUP_ROUTES = %w[
Shinya Maeda's avatar
Shinya Maeda committed
113
      -
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
    ].freeze

    ILLEGAL_PROJECT_PATH_WORDS = PROJECT_WILDCARD_ROUTES
    ILLEGAL_GROUP_PATH_WORDS = (PROJECT_WILDCARD_ROUTES | GROUP_ROUTES).freeze

    # The namespace regex is used in JavaScript to validate usernames in the "Register" form. However, Javascript
    # does not support the negative lookbehind assertion (?<!) that disallows usernames ending in `.git` and `.atom`.
    # Since this is a non-trivial problem to solve in Javascript (heavily complicate the regex, modify view code to
    # allow non-regex validations, etc), `NAMESPACE_FORMAT_REGEX_JS` serves as a Javascript-compatible version of
    # `NAMESPACE_FORMAT_REGEX`, with the negative lookbehind assertion removed. This means that the client-side validation
    # will pass for usernames ending in `.atom` and `.git`, but will be caught by the server-side validation.
    PATH_REGEX_STR = '[a-zA-Z0-9_\.][a-zA-Z0-9_\-\.]*'.freeze
    NAMESPACE_FORMAT_REGEX_JS = PATH_REGEX_STR + '[a-zA-Z0-9_\-]|[a-zA-Z0-9_]'.freeze

    NO_SUFFIX_REGEX = /(?<!\.git|\.atom)/.freeze
    NAMESPACE_FORMAT_REGEX = /(?:#{NAMESPACE_FORMAT_REGEX_JS})#{NO_SUFFIX_REGEX}/.freeze
    PROJECT_PATH_FORMAT_REGEX = /(?:#{PATH_REGEX_STR})#{NO_SUFFIX_REGEX}/.freeze
    FULL_NAMESPACE_FORMAT_REGEX = %r{(#{NAMESPACE_FORMAT_REGEX}/)*#{NAMESPACE_FORMAT_REGEX}}.freeze

    def root_namespace_route_regex
      @root_namespace_route_regex ||= begin
        illegal_words = Regexp.new(Regexp.union(TOP_LEVEL_ROUTES).source, Regexp::IGNORECASE)

        single_line_regexp %r{
          (?!(#{illegal_words})/)
          #{NAMESPACE_FORMAT_REGEX}
        }x
      end
    end

    def full_namespace_route_regex
      @full_namespace_route_regex ||= begin
        illegal_words = Regexp.new(Regexp.union(ILLEGAL_GROUP_PATH_WORDS).source, Regexp::IGNORECASE)

        single_line_regexp %r{
          #{root_namespace_route_regex}
          (?:
            /
            (?!#{illegal_words}/)
            #{NAMESPACE_FORMAT_REGEX}
          )*
        }x
      end
    end

    def project_route_regex
      @project_route_regex ||= begin
        illegal_words = Regexp.new(Regexp.union(ILLEGAL_PROJECT_PATH_WORDS).source, Regexp::IGNORECASE)

        single_line_regexp %r{
          (?!(#{illegal_words})/)
          #{PROJECT_PATH_FORMAT_REGEX}
        }x
      end
    end

    def project_git_route_regex
      @project_git_route_regex ||= /#{project_route_regex}\.git/.freeze
    end

    def root_namespace_path_regex
      @root_namespace_path_regex ||= %r{\A#{root_namespace_route_regex}/\z}
    end

    def full_namespace_path_regex
      @full_namespace_path_regex ||= %r{\A#{full_namespace_route_regex}/\z}
    end

    def project_path_regex
      @project_path_regex ||= %r{\A#{project_route_regex}/\z}
    end

    def full_project_path_regex
      @full_project_path_regex ||= %r{\A#{full_namespace_route_regex}/#{project_route_regex}/\z}
    end

    def full_namespace_format_regex
      @namespace_format_regex ||= /A#{FULL_NAMESPACE_FORMAT_REGEX}\z/.freeze
    end

    def namespace_format_regex
      @namespace_format_regex ||= /\A#{NAMESPACE_FORMAT_REGEX}\z/.freeze
    end

    def namespace_format_message
      "can contain only letters, digits, '_', '-' and '.'. " \
      "Cannot start with '-' or end in '.', '.git' or '.atom'." \
    end

    def project_path_format_regex
      @project_path_format_regex ||= /\A#{PROJECT_PATH_FORMAT_REGEX}\z/.freeze
    end

    def project_path_format_message
      "can contain only letters, digits, '_', '-' and '.'. " \
      "Cannot start with '-', end in '.git' or end in '.atom'" \
    end

    def archive_formats_regex
      #                           |zip|tar|    tar.gz    |         tar.bz2         |
      @archive_formats_regex ||= /(zip|tar|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/.freeze
    end

    def git_reference_regex
      # Valid git ref regex, see:
      # https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html

      @git_reference_regex ||= single_line_regexp %r{
        (?!
           (?# doesn't begins with)
           \/|                    (?# rule #6)
           (?# doesn't contain)
           .*(?:
              [\/.]\.|            (?# rule #1,3)
              \/\/|               (?# rule #6)
              @\{|                (?# rule #8)
              \\                  (?# rule #9)
           )
        )
        [^\000-\040\177~^:?*\[]+  (?# rule #4-5)
        (?# doesn't end with)
        (?<!\.lock)               (?# rule #1)
        (?<![\/.])                (?# rule #6-7)
      }x
    end

    private

    def single_line_regexp(regex)
      # Turns a multiline extended regexp into a single line one,
      # beacuse `rake routes` breaks on multiline regexes.
Douwe Maan's avatar
Douwe Maan committed
245
      Regexp.new(regex.source.gsub(/\(\?#.+?\)/, '').gsub(/\s*/, ''), regex.options ^ Regexp::EXTENDED).freeze
246 247 248
    end
  end
end