blob.rb 5.84 KB
Newer Older
1 2
# frozen_string_literal: true

3 4
# Gitaly note: JV: seems to be completely migrated (behind feature flags).

Robert Speicher's avatar
Robert Speicher committed
5 6 7
module Gitlab
  module Git
    class Blob
8
      include Gitlab::BlobHelper
9
      include Gitlab::EncodingHelper
10
      extend Gitlab::Git::WrapsGitalyErrors
Robert Speicher's avatar
Robert Speicher committed
11 12

      # This number is the maximum amount of data that we want to display to
13 14 15
      # the user. We load as much as we can for encoding detection and LFS
      # pointer parsing. All other cases where we need full blob data should
      # use load_all_data!.
16
      MAX_DATA_DISPLAY_SIZE = 10.megabytes
Robert Speicher's avatar
Robert Speicher committed
17

18 19 20 21 22 23
      # These limits are used as a heuristic to ignore files which can't be LFS
      # pointers. The format of these is described in
      # https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer
      LFS_POINTER_MIN_SIZE = 120.bytes
      LFS_POINTER_MAX_SIZE = 200.bytes

Robert Speicher's avatar
Robert Speicher committed
24 25 26
      attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary

      class << self
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
        def find(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
          return unless path

          path = path.sub(%r{\A/*}, '')
          path = '/' if path.empty?
          name = File.basename(path)

          # Gitaly will think that setting the limit to 0 means unlimited, while
          # the client might only need the metadata and thus set the limit to 0.
          # In this method we'll then set the limit to 1, but clear the byte of data
          # that we got back so for the outside world it looks like the limit was
          # actually 0.
          req_limit = limit == 0 ? 1 : limit

          entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, req_limit)
          return unless entry

          entry.data = "" if limit == 0

          case entry.type
          when :COMMIT
            new(id: entry.oid, name: name, size: 0, data: '', path: path, commit_id: sha)
          when :BLOB
            new(id: entry.oid, name: name, size: entry.size, data: entry.data.dup, mode: entry.mode.to_s(8),
                path: path, commit_id: sha, binary: binary?(entry.data))
Robert Speicher's avatar
Robert Speicher committed
52 53 54 55
          end
        end

        def raw(repository, sha)
56
          repository.gitaly_blob_client.get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE)
57
        end
Robert Speicher's avatar
Robert Speicher committed
58

59
        # Returns an array of Blob instances, specified in blob_references as
60 61
        # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
        # full blob contents are returned. If blob_size_limit >= 0 then each blob will
62
        # contain no more than limit bytes in its data attribute.
63
        #
64
        # Keep in mind that this method may allocate a lot of memory. It is up
65
        # to the caller to limit the number of blobs and blob_size_limit.
66
        #
67
        def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE)
68
          repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a
69 70
        end

71 72 73 74 75 76
        # Returns an array of Blob instances just with the metadata, that means
        # the data attribute has no content.
        def batch_metadata(repository, blob_references)
          batch(repository, blob_references, blob_size_limit: 0)
        end

77 78 79 80
        # Find LFS blobs given an array of sha ids
        # Returns array of Gitlab::Git::Blob
        # Does not guarantee blob data will be set
        def batch_lfs_pointers(repository, blob_ids)
81
          wrapped_gitaly_errors do
82
            repository.gitaly_blob_client.batch_lfs_pointers(blob_ids.to_a)
83
          end
84 85
        end

86
        def binary?(data)
87
          EncodingHelper.detect_libgit2_binary?(data)
88 89
        end

90 91 92
        def size_could_be_lfs?(size)
          size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE)
        end
Robert Speicher's avatar
Robert Speicher committed
93 94 95 96
      end

      def initialize(options)
        %w(id name path size data mode commit_id binary).each do |key|
97
          self.__send__("#{key}=", options[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend
Robert Speicher's avatar
Robert Speicher committed
98 99 100 101
        end

        # Retain the actual size before it is encoded
        @loaded_size = @data.bytesize if @data
102
        @loaded_all_data = @loaded_size == size
Robert Speicher's avatar
Robert Speicher committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
      end

      def binary?
        @binary.nil? ? super : @binary == true
      end

      def data
        encode! @data
      end

      # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
      # memory as a Ruby string.
      def load_all_data!(repository)
        return if @data == '' # don't mess with submodule blobs

118 119 120 121 122 123
        # Even if we return early, recalculate wether this blob is binary in
        # case a blob was initialized as text but the full data isn't
        @binary = nil

        return if @loaded_all_data

124
        @data = repository.gitaly_blob_client.get_blob(oid: id, limit: -1).data
Robert Speicher's avatar
Robert Speicher committed
125 126 127 128 129 130 131 132
        @loaded_all_data = true
        @loaded_size = @data.bytesize
      end

      def name
        encode! @name
      end

133 134 135 136
      def path
        encode! @path
      end

137 138 139 140
      def truncated?
        size && (size > loaded_size)
      end

Robert Speicher's avatar
Robert Speicher committed
141 142 143 144 145 146
      # Valid LFS object pointer is a text file consisting of
      # version
      # oid
      # size
      # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
      def lfs_pointer?
147
        self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
Robert Speicher's avatar
Robert Speicher committed
148 149 150 151 152 153 154 155 156 157 158 159 160 161
      end

      def lfs_oid
        if has_lfs_version_key?
          oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
          return oid[1] if oid
        end

        nil
      end

      def lfs_size
        if has_lfs_version_key?
          size = data.match(/(?<=size )([0-9]+)/)
162
          return size[1].to_i if size
Robert Speicher's avatar
Robert Speicher committed
163 164 165 166 167
        end

        nil
      end

168 169 170 171
      def external_storage
        return unless lfs_pointer?

        :lfs
Robert Speicher's avatar
Robert Speicher committed
172 173
      end

174 175
      alias_method :external_size, :lfs_size

Robert Speicher's avatar
Robert Speicher committed
176 177 178 179 180 181 182 183
      private

      def has_lfs_version_key?
        !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec")
      end
    end
  end
end