importer.rb 13.9 KB
Newer Older
1 2
# frozen_string_literal: true

3 4 5
module Gitlab
  module BitbucketServerImport
    class Importer
6
      attr_reader :recover_missing_commits
7
      attr_reader :project, :project_key, :repository_slug, :client, :errors, :users
8
      attr_accessor :logger
9

10
      REMOTE_NAME = 'bitbucket_server'.freeze
11
      BATCH_SIZE = 100
12

13 14
      TempBranch = Struct.new(:name, :sha)

15 16 17 18 19 20 21 22
      def self.imports_repository?
        true
      end

      def self.refmap
        [:heads, :tags, '+refs/pull-requests/*/to:refs/merge-requests/*/head']
      end

23 24 25 26 27 28
      # Unlike GitHub, you can't grab the commit SHAs for pull requests that
      # have been closed but not merged even though Bitbucket has these
      # commits internally. We can recover these pull requests by creating a
      # branch with the Bitbucket REST API, but by default we turn this
      # behavior off.
      def initialize(project, recover_missing_commits: false)
29
        @project = project
30
        @recover_missing_commits = recover_missing_commits
31 32
        @project_key = project.import_data.data['project_key']
        @repository_slug = project.import_data.data['repo_slug']
33 34 35 36
        @client = BitbucketServer::Client.new(project.import_data.credentials)
        @formatter = Gitlab::ImportFormatter.new
        @errors = []
        @users = {}
37
        @temp_branches = []
38
        @logger = Gitlab::Import::Logger.build
39 40 41
      end

      def execute
42
        import_repository
43
        import_pull_requests
44
        delete_temp_branches
45 46
        handle_errors

47 48
        log_info(stage: "complete")

49 50 51 52 53 54 55 56
        true
      end

      private

      def handle_errors
        return unless errors.any?

57
        project.import_state.update_column(:last_error, {
58 59 60 61 62
          message: 'The remote data could not be fully imported.',
          errors: errors
        }.to_json)
      end

Stan Hu's avatar
Stan Hu committed
63
      def gitlab_user_id(email)
64
        find_user_id(email) || project.creator_id
65 66
      end

67
      def find_user_id(email)
68
        return unless email
69

70
        return users[email] if users.key?(email)
71

72 73
        user = User.find_by_any_email(email, confirmed: true)
        users[email] = user&.id
74 75

        user&.id
76 77 78
      end

      def repo
79
        @repo ||= client.repo(project_key, repository_slug)
80 81
      end

82 83 84 85
      def sha_exists?(sha)
        project.repository.commit(sha)
      end

86 87 88 89
      def temp_branch_name(pull_request, suffix)
        "gitlab/import/pull-request/#{pull_request.iid}/#{suffix}"
      end

90 91 92 93
      # This method restores required SHAs that GitLab needs to create diffs
      # into branch names as the following:
      #
      # gitlab/import/pull-request/N/{to,from}
94 95
      def restore_branches(pull_requests)
        shas_to_restore = []
96

97
        pull_requests.each do |pull_request|
98 99 100 101
          shas_to_restore << TempBranch.new(temp_branch_name(pull_request, :from),
                                            pull_request.source_branch_sha)
          shas_to_restore << TempBranch.new(temp_branch_name(pull_request, :to),
                                            pull_request.target_branch_sha)
102
        end
103

104
        # Create the branches on the Bitbucket Server first
105
        created_branches = restore_branch_shas(shas_to_restore)
106

107
        @temp_branches += created_branches
108
        # Now sync the repository so we get the new branches
109
        import_repository unless created_branches.empty?
110 111
      end

112
      def restore_branch_shas(shas_to_restore)
113 114 115
        shas_to_restore.each_with_object([]) do |temp_branch, branches_created|
          branch_name = temp_branch.name
          sha = temp_branch.sha
116

117
          next if sha_exists?(sha)
118

119 120 121 122
          begin
            client.create_branch(project_key, repository_slug, branch_name, sha)
            branches_created << temp_branch
          rescue BitbucketServer::Connection::ConnectionError => e
123
            log_warn(message: "Unable to recreate branch", sha: sha, error: e.message)
124 125 126 127 128
          end
        end
      end

      def import_repository
129 130
        log_info(stage: 'import_repository', message: 'starting import')

131 132
        project.ensure_repository
        project.repository.fetch_as_mirror(project.import_url, refmap: self.class.refmap, remote_name: REMOTE_NAME)
133 134

        log_info(stage: 'import_repository', message: 'finished import')
135
      rescue Gitlab::Shell::Error => e
136 137
        log_error(stage: 'import_repository', message: 'failed import', error: e.message)

138 139 140 141 142
        # Expire cache to prevent scenarios such as:
        # 1. First import failed, but the repo was imported successfully, so +exists?+ returns true
        # 2. Retried import, repo is broken or not imported but +exists?+ still returns true
        project.repository.expire_content_cache if project.repository_exists?

143
        raise
144 145
      end

146 147 148 149 150 151 152 153 154
      # Bitbucket Server keeps tracks of references for open pull requests in
      # refs/heads/pull-requests, but closed and merged requests get moved
      # into hidden internal refs under stash-refs/pull-requests. Unless the
      # SHAs involved are at the tip of a branch or tag, there is no way to
      # retrieve the server for those commits.
      #
      # To avoid losing history, we use the Bitbucket API to re-create the branch
      # on the remote server. Then we have to issue a `git fetch` to download these
      # branches.
155
      def import_pull_requests
156 157 158 159 160 161
        pull_requests = client.pull_requests(project_key, repository_slug).to_a

        # Creating branches on the server and fetching the newly-created branches
        # may take a number of network round-trips. Do this in batches so that we can
        # avoid doing a git fetch for every new branch.
        pull_requests.each_slice(BATCH_SIZE) do |batch|
162
          restore_branches(batch) if recover_missing_commits
163 164

          batch.each do |pull_request|
Nick Thomas's avatar
Nick Thomas committed
165 166 167 168 169 170
            import_bitbucket_pull_request(pull_request)
          rescue StandardError => e
            backtrace = Gitlab::Profiler.clean_backtrace(e.backtrace)
            log_error(stage: 'import_pull_requests', iid: pull_request.iid, error: e.message, backtrace: backtrace)

            errors << { type: :pull_request, iid: pull_request.iid, errors: e.message, backtrace: backtrace.join("\n"), raw_response: pull_request.raw }
171 172 173 174
          end
        end
      end

175
      def delete_temp_branches
176
        @temp_branches.each do |branch|
Nick Thomas's avatar
Nick Thomas committed
177 178 179 180 181
          client.delete_branch(project_key, repository_slug, branch.name, branch.sha)
          project.repository.delete_branch(branch.name)
        rescue BitbucketServer::Connection::ConnectionError => e
          log_error(stage: 'delete_temp_branches', branch: branch.name, error: e.message)
          @errors << { type: :delete_temp_branches, branch_name: branch.name, errors: e.message }
182 183 184
        end
      end

185
      def import_bitbucket_pull_request(pull_request)
186 187
        log_info(stage: 'import_bitbucket_pull_requests', message: 'starting', iid: pull_request.iid)

188 189
        description = ''
        description += @formatter.author_line(pull_request.author) unless find_user_id(pull_request.author_email)
Stan Hu's avatar
Stan Hu committed
190
        description += pull_request.description if pull_request.description
Stan Hu's avatar
Stan Hu committed
191
        author_id = gitlab_user_id(pull_request.author_email)
192

193 194 195 196
        attributes = {
          iid: pull_request.iid,
          title: pull_request.title,
          description: description,
197
          source_project_id: project.id,
198
          source_branch: Gitlab::Git.ref_name(pull_request.source_branch_name),
199 200
          source_branch_sha: pull_request.source_branch_sha,
          target_project_id: project.id,
201
          target_branch: Gitlab::Git.ref_name(pull_request.target_branch_name),
202
          target_branch_sha: pull_request.target_branch_sha,
203
          state: pull_request.state,
204
          state_id: MergeRequest.available_states[pull_request.state],
205
          author_id: author_id,
206 207 208 209 210
          assignee_id: nil,
          created_at: pull_request.created_at,
          updated_at: pull_request.updated_at
        }

211 212 213
        creator = Gitlab::Import::MergeRequestCreator.new(project)
        merge_request = creator.execute(attributes)

214
        import_pull_request_comments(pull_request, merge_request) if merge_request.persisted?
215 216

        log_info(stage: 'import_bitbucket_pull_requests', message: 'finished', iid: pull_request.iid)
217 218
      end

219
      def import_pull_request_comments(pull_request, merge_request)
220 221
        log_info(stage: 'import_pull_request_comments', message: 'starting', iid: merge_request.iid)

222
        comments, other_activities = client.activities(project_key, repository_slug, pull_request.iid).partition(&:comment?)
223

224
        merge_event = other_activities.find(&:merge_event?)
225 226
        import_merge_event(merge_request, merge_event) if merge_event

227
        inline_comments, pr_comments = comments.partition(&:inline_comment?)
228

229
        import_inline_comments(inline_comments.map(&:comment), merge_request)
230
        import_standalone_pr_comments(pr_comments.map(&:comment), merge_request)
231 232 233 234 235

        log_info(stage: 'import_pull_request_comments', message: 'finished', iid: merge_request.iid,
                 merge_event_found: merge_event.present?,
                 inline_comments_count: inline_comments.count,
                 standalone_pr_comments: pr_comments.count)
236 237
      end

238
      # rubocop: disable CodeReuse/ActiveRecord
239
      def import_merge_event(merge_request, merge_event)
240 241
        log_info(stage: 'import_merge_event', message: 'starting', iid: merge_request.iid)

242
        committer = merge_event.committer_email
243

Stan Hu's avatar
Stan Hu committed
244
        user_id = gitlab_user_id(committer)
245
        timestamp = merge_event.merge_timestamp
246
        merge_request.update({ merge_commit_sha: merge_event.merge_commit })
247
        metric = MergeRequest::Metrics.find_or_initialize_by(merge_request: merge_request)
248
        metric.update(merged_by_id: user_id, merged_at: timestamp)
249 250

        log_info(stage: 'import_merge_event', message: 'finished', iid: merge_request.iid)
251
      end
252
      # rubocop: enable CodeReuse/ActiveRecord
253

254
      def import_inline_comments(inline_comments, merge_request)
255 256
        log_info(stage: 'import_inline_comments', message: 'starting', iid: merge_request.iid)

257
        inline_comments.each do |comment|
258
          position = build_position(merge_request, comment)
259
          parent = create_diff_note(merge_request, comment, position)
260 261 262

          next unless parent&.persisted?

263 264
          discussion_id = parent.discussion_id

265
          comment.comments.each do |reply|
266
            create_diff_note(merge_request, reply, position, discussion_id)
267 268
          end
        end
269 270

        log_info(stage: 'import_inline_comments', message: 'finished', iid: merge_request.iid)
271 272
      end

273
      def create_diff_note(merge_request, comment, position, discussion_id = nil)
274
        attributes = pull_request_comment_attributes(comment)
Stan Hu's avatar
Stan Hu committed
275
        attributes.merge!(position: position, type: 'DiffNote')
276
        attributes[:discussion_id] = discussion_id if discussion_id
277

278 279 280 281 282 283 284
        note = merge_request.notes.build(attributes)

        if note.valid?
          note.save
          return note
        end

285 286
        log_info(stage: 'create_diff_note', message: 'creating fallback DiffNote', iid: merge_request.iid)

Stan Hu's avatar
Stan Hu committed
287 288 289
        # Bitbucket Server supports the ability to comment on any line, not just the
        # line in the diff. If we can't add the note as a DiffNote, fallback to creating
        # a regular note.
290
        create_fallback_diff_note(merge_request, comment, position)
291
      rescue StandardError => e
292
        log_error(stage: 'create_diff_note', comment_id: comment.id, error: e.message)
293 294 295 296
        errors << { type: :pull_request, id: comment.id, errors: e.message }
        nil
      end

297
      def create_fallback_diff_note(merge_request, comment, position)
298
        attributes = pull_request_comment_attributes(comment)
299
        note = "*Comment on"
300

301 302
        note += " #{position.old_path}:#{position.old_line} -->" if position.old_line
        note += " #{position.new_path}:#{position.new_line}" if position.new_line
303
        note += "*\n\n#{comment.note}"
304 305

        attributes[:note] = note
306 307 308
        merge_request.notes.create!(attributes)
      end

309 310 311 312 313 314 315 316 317 318 319 320 321 322
      def build_position(merge_request, pr_comment)
        params = {
          diff_refs: merge_request.diff_refs,
          old_path: pr_comment.file_path,
          new_path: pr_comment.file_path,
          old_line: pr_comment.old_pos,
          new_line: pr_comment.new_pos
        }

        Gitlab::Diff::Position.new(params)
      end

      def import_standalone_pr_comments(pr_comments, merge_request)
        pr_comments.each do |comment|
Nick Thomas's avatar
Nick Thomas committed
323
          merge_request.notes.create!(pull_request_comment_attributes(comment))
324

Nick Thomas's avatar
Nick Thomas committed
325 326
          comment.comments.each do |replies|
            merge_request.notes.create!(pull_request_comment_attributes(replies))
327
          end
Nick Thomas's avatar
Nick Thomas committed
328 329 330
        rescue StandardError => e
          log_error(stage: 'import_standalone_pr_comments', merge_request_id: merge_request.id, comment_id: comment.id, error: e.message)
          errors << { type: :pull_request, comment_id: comment.id, errors: e.message }
331 332 333 334
        end
      end

      def pull_request_comment_attributes(comment)
335 336 337 338 339
        author = find_user_id(comment.author_email)
        note = ''

        unless author
          author = project.creator_id
340
          note = "*By #{comment.author_username} (#{comment.author_email})*\n\n"
341 342 343
        end

        note +=
344 345 346 347 348 349 350
          # Provide some context for replying
          if comment.parent_comment
            "> #{comment.parent_comment.note.truncate(80)}\n\n#{comment.note}"
          else
            comment.note
          end

351 352
        {
          project: project,
353
          note: note,
354
          author_id: author,
355 356 357 358
          created_at: comment.created_at,
          updated_at: comment.updated_at
        }
      end
359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378

      def log_info(details)
        logger.info(log_base_data.merge(details))
      end

      def log_error(details)
        logger.error(log_base_data.merge(details))
      end

      def log_warn(details)
        logger.warn(log_base_data.merge(details))
      end

      def log_base_data
        {
          class: self.class.name,
          project_id: project.id,
          project_path: project.full_path
        }
      end
379 380 381
    end
  end
end