Commit decb7bfb authored by Kassio Borges's avatar Kassio Borges Committed by Matthias Käppler

GithubImporter: Count and log each object imported

To provide a better feedback about what was imported and make it easier
to debug missing objects, not the FinishImportWorker will log the number
of objects imported, like:

```
{
  "message": "GitHub project import finished",
  "duration_s": 377.68,
  "objects_imported": {
    "github_importer_imported_pull_requests_merged_by": 92,
    "github_importer_imported_pull_request_reviews": 81,
    "github_importer_imported_diff_notes": 93,
    "github_importer_imported_pull_requests": 108,
    "github_importer_imported_notes": 794,
    "github_importer_imported_issues": 321
  },
  "import_source": "github",
  "project_id": 38,
  "import_stage": "Gitlab::GithubImport::Stage::FinishImportWorker"
}
```

Changelog: changed
parent ab49d4cb
...@@ -36,16 +36,29 @@ module Gitlab ...@@ -36,16 +36,29 @@ module Gitlab
importer_class.new(object, project, client).execute importer_class.new(object, project, client).execute
counter.increment increment_counters(project)
info(project.id, message: 'importer finished') info(project.id, message: 'importer finished')
rescue StandardError => e rescue StandardError => e
error(project.id, e, hash) error(project.id, e, hash)
end end
# Counters incremented:
# - global (prometheus): for metrics in Grafana
# - project (redis): used in FinishImportWorker to report number of objects imported
def increment_counters(project)
counter.increment
Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :imported)
end
def counter def counter
@counter ||= Gitlab::Metrics.counter(counter_name, counter_description) @counter ||= Gitlab::Metrics.counter(counter_name, counter_description)
end end
def object_type
raise NotImplementedError
end
# Returns the representation class to use for the object. This class must # Returns the representation class to use for the object. This class must
# define the class method `from_json_hash`. # define the class method `from_json_hash`.
def representation_class def representation_class
......
...@@ -13,6 +13,10 @@ module Gitlab ...@@ -13,6 +13,10 @@ module Gitlab
Importer::DiffNoteImporter Importer::DiffNoteImporter
end end
def object_type
:diff_note
end
def counter_name def counter_name
:github_importer_imported_diff_notes :github_importer_imported_diff_notes
end end
......
...@@ -13,6 +13,10 @@ module Gitlab ...@@ -13,6 +13,10 @@ module Gitlab
Importer::IssueAndLabelLinksImporter Importer::IssueAndLabelLinksImporter
end end
def object_type
:issue
end
def counter_name def counter_name
:github_importer_imported_issues :github_importer_imported_issues
end end
......
...@@ -13,6 +13,10 @@ module Gitlab ...@@ -13,6 +13,10 @@ module Gitlab
Importer::LfsObjectImporter Importer::LfsObjectImporter
end end
def object_type
:lfs_object
end
def counter_name def counter_name
:github_importer_imported_lfs_objects :github_importer_imported_lfs_objects
end end
......
...@@ -13,6 +13,10 @@ module Gitlab ...@@ -13,6 +13,10 @@ module Gitlab
Importer::NoteImporter Importer::NoteImporter
end end
def object_type
:note
end
def counter_name def counter_name
:github_importer_imported_notes :github_importer_imported_notes
end end
......
...@@ -15,6 +15,10 @@ module Gitlab ...@@ -15,6 +15,10 @@ module Gitlab
Importer::PullRequestMergedByImporter Importer::PullRequestMergedByImporter
end end
def object_type
:pull_request_merged_by
end
def counter_name def counter_name
:github_importer_imported_pull_requests_merged_by :github_importer_imported_pull_requests_merged_by
end end
......
...@@ -15,6 +15,10 @@ module Gitlab ...@@ -15,6 +15,10 @@ module Gitlab
Importer::PullRequestReviewImporter Importer::PullRequestReviewImporter
end end
def object_type
:pull_request_review
end
def counter_name def counter_name
:github_importer_imported_pull_request_reviews :github_importer_imported_pull_request_reviews
end end
......
...@@ -13,6 +13,10 @@ module Gitlab ...@@ -13,6 +13,10 @@ module Gitlab
Importer::PullRequestImporter Importer::PullRequestImporter
end end
def object_type
:pull_request
end
def counter_name def counter_name
:github_importer_imported_pull_requests :github_importer_imported_pull_requests
end end
......
...@@ -29,7 +29,8 @@ module Gitlab ...@@ -29,7 +29,8 @@ module Gitlab
info( info(
project.id, project.id,
message: "GitHub project import finished", message: "GitHub project import finished",
duration_s: duration.round(2) duration_s: duration.round(2),
object_counts: ::Gitlab::GithubImport::ObjectCounter.summary(project)
) )
end end
......
...@@ -22,6 +22,10 @@ module Gitlab ...@@ -22,6 +22,10 @@ module Gitlab
:pull_requests_comments :pull_requests_comments
end end
def object_type
:diff_note
end
def id_for_already_imported_cache(note) def id_for_already_imported_cache(note)
note.id note.id
end end
......
...@@ -18,6 +18,10 @@ module Gitlab ...@@ -18,6 +18,10 @@ module Gitlab
ImportIssueWorker ImportIssueWorker
end end
def object_type
:issue
end
def collection_method def collection_method
:issues :issues
end end
......
...@@ -18,6 +18,10 @@ module Gitlab ...@@ -18,6 +18,10 @@ module Gitlab
ImportLfsObjectWorker ImportLfsObjectWorker
end end
def object_type
:lfs_object
end
def collection_method def collection_method
:lfs_objects :lfs_objects
end end
...@@ -26,6 +30,8 @@ module Gitlab ...@@ -26,6 +30,8 @@ module Gitlab
lfs_objects = Projects::LfsPointers::LfsObjectDownloadListService.new(project).execute lfs_objects = Projects::LfsPointers::LfsObjectDownloadListService.new(project).execute
lfs_objects.each do |object| lfs_objects.each do |object|
Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
yield object yield object
end end
rescue StandardError => e rescue StandardError => e
......
...@@ -18,6 +18,10 @@ module Gitlab ...@@ -18,6 +18,10 @@ module Gitlab
ImportNoteWorker ImportNoteWorker
end end
def object_type
:note
end
def collection_method def collection_method
:issues_comments :issues_comments
end end
......
...@@ -22,6 +22,10 @@ module Gitlab ...@@ -22,6 +22,10 @@ module Gitlab
pr.number pr.number
end end
def object_type
:pull_request
end
def each_object_to_import def each_object_to_import
super do |pr| super do |pr|
update_repository if update_repository?(pr) update_repository if update_repository?(pr)
......
...@@ -22,6 +22,10 @@ module Gitlab ...@@ -22,6 +22,10 @@ module Gitlab
:pull_requests_merged_by :pull_requests_merged_by
end end
def object_type
:pull_request_merged_by
end
def id_for_already_imported_cache(merge_request) def id_for_already_imported_cache(merge_request)
merge_request.id merge_request.id
end end
...@@ -30,6 +34,8 @@ module Gitlab ...@@ -30,6 +34,8 @@ module Gitlab
project.merge_requests.with_state(:merged).find_each do |merge_request| project.merge_requests.with_state(:merged).find_each do |merge_request|
next if already_imported?(merge_request) next if already_imported?(merge_request)
Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
pull_request = client.pull_request(project.import_source, merge_request.iid) pull_request = client.pull_request(project.import_source, merge_request.iid)
yield(pull_request) yield(pull_request)
......
...@@ -29,6 +29,10 @@ module Gitlab ...@@ -29,6 +29,10 @@ module Gitlab
:pull_request_reviews :pull_request_reviews
end end
def object_type
:pull_request_review
end
def id_for_already_imported_cache(review) def id_for_already_imported_cache(review)
review.id review.id
end end
...@@ -57,6 +61,8 @@ module Gitlab ...@@ -57,6 +61,8 @@ module Gitlab
project.merge_requests.find_each do |merge_request| project.merge_requests.find_each do |merge_request|
next if already_imported?(merge_request) next if already_imported?(merge_request)
Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
client client
.pull_request_reviews(project.import_source, merge_request.iid) .pull_request_reviews(project.import_source, merge_request.iid)
.each do |review| .each do |review|
...@@ -81,6 +87,8 @@ module Gitlab ...@@ -81,6 +87,8 @@ module Gitlab
page.objects.each do |review| page.objects.each do |review|
next if already_imported?(review) next if already_imported?(review)
Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
review.merge_request_id = merge_request.id review.merge_request_id = merge_request.id
yield(review) yield(review)
......
# frozen_string_literal: true
# Count objects fetched or imported from Github in the context of the
# project being imported.
module Gitlab
module GithubImport
class ObjectCounter
OPERATIONS = %w[fetched imported].freeze
COUNTER_LIST_KEY = 'github-importer/object-counters-list/%{project}/%{operation}'
COUNTER_KEY = 'github-importer/object-counter/%{project}/%{operation}/%{object_type}'
CACHING = Gitlab::Cache::Import::Caching
class << self
def increment(project, object_type, operation)
validate_operation!(operation)
counter_key = COUNTER_KEY % { project: project.id, operation: operation, object_type: object_type }
add_counter_to_list(project, operation, counter_key)
CACHING.increment(counter_key)
end
def summary(project)
OPERATIONS.each_with_object({}) do |operation, result|
result[operation] = {}
CACHING
.values_from_set(counter_list_key(project, operation))
.sort
.each do |counter|
object_type = counter.split('/').last
result[operation][object_type] = CACHING.read_integer(counter)
end
end
end
private
def add_counter_to_list(project, operation, key)
CACHING.set_add(counter_list_key(project, operation), key)
end
def counter_list_key(project, operation)
COUNTER_LIST_KEY % { project: project.id, operation: operation }
end
def validate_operation!(operation)
unless operation.to_s.presence_in(OPERATIONS)
raise ArgumentError, "Operation must be #{OPERATIONS.join(' or ')}"
end
end
end
end
end
end
...@@ -103,6 +103,8 @@ module Gitlab ...@@ -103,6 +103,8 @@ module Gitlab
page.objects.each do |object| page.objects.each do |object|
next if already_imported?(object) next if already_imported?(object)
Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
yield object yield object
# We mark the object as imported immediately so we don't end up # We mark the object as imported immediately so we don't end up
...@@ -129,6 +131,10 @@ module Gitlab ...@@ -129,6 +131,10 @@ module Gitlab
Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, id) Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, id)
end end
def object_type
raise NotImplementedError
end
# Returns the ID to use for the cache used for checking if an object has # Returns the ID to use for the cache used for checking if an object has
# already been imported or not. # already been imported or not.
# #
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::GithubImport::ObjectCounter, :clean_gitlab_redis_cache do
let_it_be(:project) { create(:project) }
it 'validates the operation being incremented' do
expect { described_class.increment(project, :issue, :unknown) }
.to raise_error(ArgumentError, 'Operation must be fetched or imported')
end
it 'increments the counter and saves the key to be listed in the summary later' do
described_class.increment(project, :issue, :fetched)
described_class.increment(project, :issue, :fetched)
described_class.increment(project, :issue, :imported)
described_class.increment(project, :issue, :imported)
expect(described_class.summary(project)).to eq({
'fetched' => { 'issue' => 2 },
'imported' => { 'issue' => 2 }
})
end
end
...@@ -11,6 +11,10 @@ RSpec.describe Gitlab::GithubImport::ParallelScheduling do ...@@ -11,6 +11,10 @@ RSpec.describe Gitlab::GithubImport::ParallelScheduling do
Class Class
end end
def object_type
:dummy
end
def collection_method def collection_method
:issues :issues
end end
......
...@@ -4,7 +4,7 @@ require 'spec_helper' ...@@ -4,7 +4,7 @@ require 'spec_helper'
RSpec.describe Gitlab::GithubImport do RSpec.describe Gitlab::GithubImport do
context 'github.com' do context 'github.com' do
let(:project) { double(:project, import_url: 'http://t0ken@github.com/user/repo.git') } let(:project) { double(:project, import_url: 'http://t0ken@github.com/user/repo.git', id: 1) }
it 'returns a new Client with a custom token' do it 'returns a new Client with a custom token' do
expect(described_class::Client) expect(described_class::Client)
......
...@@ -19,6 +19,10 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do ...@@ -19,6 +19,10 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do
'This is a counter' 'This is a counter'
end end
def object_type
:dummy
end
def representation_class def representation_class
MockRepresantation MockRepresantation
end end
...@@ -42,7 +46,7 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do ...@@ -42,7 +46,7 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do
end) end)
end end
describe '#import' do describe '#import', :clean_gitlab_redis_shared_state do
let(:importer_class) { double(:importer_class, name: 'klass_name') } let(:importer_class) { double(:importer_class, name: 'klass_name') }
let(:importer_instance) { double(:importer_instance) } let(:importer_instance) { double(:importer_instance) }
let(:project) { double(:project, full_path: 'foo/bar', id: 1) } let(:project) { double(:project, full_path: 'foo/bar', id: 1) }
...@@ -90,6 +94,11 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do ...@@ -90,6 +94,11 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do
end end
worker.import(project, client, { 'number' => 10, 'github_id' => 1 }) worker.import(project, client, { 'number' => 10, 'github_id' => 1 })
expect(Gitlab::GithubImport::ObjectCounter.summary(project)).to eq({
'fetched' => {},
'imported' => { 'dummy' => 1 }
})
end end
it 'logs error when the import fails' do it 'logs error when the import fails' do
......
...@@ -33,6 +33,10 @@ RSpec.describe Gitlab::GithubImport::Stage::FinishImportWorker do ...@@ -33,6 +33,10 @@ RSpec.describe Gitlab::GithubImport::Stage::FinishImportWorker do
message: 'GitHub project import finished', message: 'GitHub project import finished',
import_stage: 'Gitlab::GithubImport::Stage::FinishImportWorker', import_stage: 'Gitlab::GithubImport::Stage::FinishImportWorker',
import_source: :github, import_source: :github,
object_counts: {
'fetched' => {},
'imported' => {}
},
project_id: project.id, project_id: project.id,
duration_s: a_kind_of(Numeric) duration_s: a_kind_of(Numeric)
) )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment