Commit f8c613f1 authored by George Koltsov's avatar George Koltsov

Filter GitHub projects to import using GitHub Search API

  - Replace runtime GitHub projects filtering in favour of
    GitHub Search API, in order to improve performance of
    returning the filtered result and allow pagination to
    be added in the future
parent 1099804b
......@@ -7,11 +7,14 @@ import { visitUrl, objectToQuery } from '~/lib/utils/url_utility';
import { deprecatedCreateFlash as createFlash } from '~/flash';
import { s__, sprintf } from '~/locale';
import axios from '~/lib/utils/axios_utils';
import httpStatusCodes from '~/lib/utils/http_status';
import { capitalizeFirstCharacter } from '~/lib/utils/text_utility';
let eTagPoll;
const hasRedirectInError = e => e?.response?.data?.error?.redirect;
const redirectToUrlInError = e => visitUrl(e.response.data.error.redirect);
const tooManyRequests = e => e.response.status === httpStatusCodes.TOO_MANY_REQUESTS;
const pathWithParams = ({ path, ...params }) => {
const filteredParams = Object.fromEntries(
Object.entries(params).filter(([, value]) => value !== ''),
......@@ -71,6 +74,14 @@ const fetchReposFactory = ({ reposPath = isRequired() }) => ({ state, commit })
if (hasRedirectInError(e)) {
redirectToUrlInError(e);
} else if (tooManyRequests(e)) {
createFlash(
sprintf(s__('ImportProjects|%{provider} rate limit exceeded. Try again later'), {
provider: capitalizeFirstCharacter(provider),
}),
);
commit(types.RECEIVE_REPOS_ERROR);
} else {
createFlash(
sprintf(s__('ImportProjects|Requesting your %{provider} repositories failed'), {
......
......@@ -22,6 +22,7 @@ const httpStatusCodes = {
CONFLICT: 409,
GONE: 410,
UNPROCESSABLE_ENTITY: 422,
TOO_MANY_REQUESTS: 429,
INTERNAL_SERVER_ERROR: 500,
SERVICE_UNAVAILABLE: 503,
};
......
......@@ -15,6 +15,7 @@ class Import::GithubController < Import::BaseController
rescue_from OAuthConfigMissingError, with: :missing_oauth_config
rescue_from Octokit::Unauthorized, with: :provider_unauthorized
rescue_from Octokit::TooManyRequests, with: :provider_rate_limit
rescue_from Gitlab::GithubImport::RateLimitError, with: :rate_limit_threshold_exceeded
def new
if !ci_cd_only? && github_import_configured? && logged_in_with_provider?
......@@ -114,7 +115,7 @@ class Import::GithubController < Import::BaseController
def client_repos
@client_repos ||= if Feature.enabled?(:remove_legacy_github_client)
filtered(concatenated_repos)
concatenated_repos
else
filtered(client.repos)
end
......@@ -122,8 +123,15 @@ class Import::GithubController < Import::BaseController
def concatenated_repos
return [] unless client.respond_to?(:each_page)
return client.each_page(:repos).flat_map(&:objects) unless sanitized_filter_param
client.each_page(:repos).flat_map(&:objects)
client.search_repos_by_name(sanitized_filter_param).flat_map(&:objects).flat_map(&:items)
end
def sanitized_filter_param
super
@filter = @filter&.tr(' ', '')&.tr(':', '')
end
def oauth_client
......@@ -245,6 +253,10 @@ class Import::GithubController < Import::BaseController
def extra_import_params
{}
end
def rate_limit_threshold_exceeded
head :too_many_requests
end
end
Import::GithubController.prepend_if_ee('EE::Import::GithubController')
---
title: Filter GitHub projects to import using GitHub Search API
merge_request: 47002
author:
type: changed
......@@ -18,6 +18,8 @@ module Gitlab
attr_reader :octokit
SEARCH_MAX_REQUESTS_PER_MINUTE = 30
# A single page of data and the corresponding page number.
Page = Struct.new(:objects, :number)
......@@ -28,6 +30,7 @@ module Gitlab
# rate limit at once. The threshold is put in place to not hit the limit
# in most cases.
RATE_LIMIT_THRESHOLD = 50
SEARCH_RATE_LIMIT_THRESHOLD = 3
# token - The GitHub API token to use.
#
......@@ -152,8 +155,26 @@ module Gitlab
end
end
def search_repos_by_name(name)
each_page(:search_repositories, search_query(str: name, type: :name))
end
def search_query(str:, type:, include_collaborations: true, include_orgs: true)
query = "#{str} in:#{type} is:public,private user:#{octokit.user.login}"
query = [query, collaborations_subquery].join(' ') if include_collaborations
query = [query, organizations_subquery].join(' ') if include_orgs
query
end
# Returns `true` if we're still allowed to perform API calls.
# Search API has rate limit of 30, use lowered threshold when search is used.
def requests_remaining?
if requests_limit == SEARCH_MAX_REQUESTS_PER_MINUTE
return remaining_requests > SEARCH_RATE_LIMIT_THRESHOLD
end
remaining_requests > RATE_LIMIT_THRESHOLD
end
......@@ -161,6 +182,10 @@ module Gitlab
octokit.rate_limit.remaining
end
def requests_limit
octokit.rate_limit.limit
end
def raise_or_wait_for_rate_limit
rate_limit_counter.increment
......@@ -221,6 +246,20 @@ module Gitlab
'The number of GitHub API calls performed when importing projects'
)
end
private
def collaborations_subquery
each_object(:repos, nil, { affiliation: 'collaborator' })
.map { |repo| "repo:#{repo.full_name}" }
.join(' ')
end
def organizations_subquery
each_object(:organizations)
.map { |org| "org:#{org.login}" }
.join(' ')
end
end
end
end
......@@ -14166,6 +14166,9 @@ msgstr ""
msgid "ImportButtons|Connect repositories from"
msgstr ""
msgid "ImportProjects|%{provider} rate limit exceeded. Try again later"
msgstr ""
msgid "ImportProjects|Blocked import URL: %{message}"
msgstr ""
......
......@@ -144,6 +144,58 @@ RSpec.describe Import::GithubController do
expect(json_response.dig('provider_repos', 0, 'id')).to eq(repo_1.id)
expect(json_response.dig('provider_repos', 1, 'id')).to eq(repo_2.id)
end
context 'when filtering' do
let(:filter) { 'test' }
let(:user_login) { 'user' }
let(:collaborations_subquery) { 'repo:repo1 repo:repo2' }
let(:organizations_subquery) { 'org:org1 org:org2' }
before do
allow_next_instance_of(Octokit::Client) do |client|
allow(client).to receive(:user).and_return(double(login: user_login))
end
end
it 'makes request to github search api' do
expected_query = "test in:name is:public,private user:#{user_login} #{collaborations_subquery} #{organizations_subquery}"
expect_next_instance_of(Gitlab::GithubImport::Client) do |client|
expect(client).to receive(:collaborations_subquery).and_return(collaborations_subquery)
expect(client).to receive(:organizations_subquery).and_return(organizations_subquery)
expect(client).to receive(:each_page).with(:search_repositories, expected_query).and_return([].to_enum)
end
get :status, params: { filter: filter }, format: :json
end
context 'when user input contains colons and spaces' do
before do
stub_client(search_repos_by_name: [])
end
it 'sanitizes user input' do
filter = ' test1:test2 test3 : test4 '
expected_filter = 'test1test2test3test4'
get :status, params: { filter: filter }, format: :json
expect(assigns(:filter)).to eq(expected_filter)
end
end
context 'when rate limit threshold is exceeded' do
before do
allow(controller).to receive(:status).and_raise(Gitlab::GithubImport::RateLimitError)
end
it 'returns 429' do
get :status, params: { filter: 'test' }, format: :json
expect(response).to have_gitlab_http_status(:too_many_requests)
end
end
end
end
end
......
......@@ -69,6 +69,7 @@ describe('import_projects store actions', () => {
importStatus: STATUSES.NONE,
},
],
provider: 'provider',
};
localState.getImportTarget = getImportTarget(localState);
......@@ -150,7 +151,28 @@ describe('import_projects store actions', () => {
);
});
describe('when /home/xanf/projects/gdk/gitlab/spec/frontend/import_projects/store/actions_spec.jsfiltered', () => {
describe('when rate limited', () => {
it('commits RECEIVE_REPOS_ERROR and shows rate limited error message', async () => {
mock.onGet(`${TEST_HOST}/endpoint.json?filter=filter`).reply(429);
await testAction(
fetchRepos,
null,
{ ...localState, filter: 'filter' },
[
{ type: SET_PAGE, payload: 1 },
{ type: REQUEST_REPOS },
{ type: SET_PAGE, payload: 0 },
{ type: RECEIVE_REPOS_ERROR },
],
[],
);
expect(createFlash).toHaveBeenCalledWith('Provider rate limit exceeded. Try again later');
});
});
describe('when filtered', () => {
it('fetches repos with filter applied', () => {
mock.onGet(`${TEST_HOST}/endpoint.json?filter=filter`).reply(200, payload);
......
......@@ -203,6 +203,11 @@ RSpec.describe Gitlab::GithubImport::Client do
describe '#requests_remaining?' do
let(:client) { described_class.new('foo') }
context 'when default requests limit is set' do
before do
allow(client).to receive(:requests_limit).and_return(5000)
end
it 'returns true if enough requests remain' do
expect(client).to receive(:remaining_requests).and_return(9000)
......@@ -216,6 +221,25 @@ RSpec.describe Gitlab::GithubImport::Client do
end
end
context 'when search requests limit is set' do
before do
allow(client).to receive(:requests_limit).and_return(described_class::SEARCH_MAX_REQUESTS_PER_MINUTE)
end
it 'returns true if enough requests remain' do
expect(client).to receive(:remaining_requests).and_return(described_class::SEARCH_RATE_LIMIT_THRESHOLD + 1)
expect(client.requests_remaining?).to eq(true)
end
it 'returns false if not enough requests remain' do
expect(client).to receive(:remaining_requests).and_return(described_class::SEARCH_RATE_LIMIT_THRESHOLD - 1)
expect(client.requests_remaining?).to eq(false)
end
end
end
describe '#raise_or_wait_for_rate_limit' do
it 'raises RateLimitError when running in parallel mode' do
client = described_class.new('foo', parallel: true)
......@@ -262,6 +286,16 @@ RSpec.describe Gitlab::GithubImport::Client do
end
end
describe '#requests_limit' do
it 'returns requests limit' do
client = described_class.new('foo')
rate_limit = double(limit: 1)
expect(client.octokit).to receive(:rate_limit).and_return(rate_limit)
expect(client.requests_limit).to eq(1)
end
end
describe '#rate_limit_resets_in' do
it 'returns the number of seconds after which the rate limit is reset' do
client = described_class.new('foo')
......@@ -417,4 +451,61 @@ RSpec.describe Gitlab::GithubImport::Client do
expect(client.rate_limiting_enabled?).to eq(false)
end
end
describe 'search' do
let(:client) { described_class.new('foo') }
let(:user) { double(:user, login: 'user') }
let(:org1) { double(:org, login: 'org1') }
let(:org2) { double(:org, login: 'org2') }
let(:repo1) { double(:repo, full_name: 'repo1') }
let(:repo2) { double(:repo, full_name: 'repo2') }
before do
allow(client)
.to receive(:each_object)
.with(:repos, nil, { affiliation: 'collaborator' })
.and_return([repo1, repo2].to_enum)
allow(client)
.to receive(:each_object)
.with(:organizations)
.and_return([org1, org2].to_enum)
allow(client.octokit).to receive(:user).and_return(user)
end
describe '#search_repos_by_name' do
it 'searches for repositories based on name' do
expected_search_query = 'test in:name is:public,private user:user repo:repo1 repo:repo2 org:org1 org:org2'
expect(client).to receive(:each_page).with(:search_repositories, expected_search_query)
client.search_repos_by_name('test')
end
end
describe '#search_query' do
it 'returns base search query' do
result = client.search_query(str: 'test', type: :test, include_collaborations: false, include_orgs: false)
expect(result).to eq('test in:test is:public,private user:user')
end
context 'when include_collaborations is true' do
it 'returns search query including users' do
result = client.search_query(str: 'test', type: :test, include_collaborations: true, include_orgs: false)
expect(result).to eq('test in:test is:public,private user:user repo:repo1 repo:repo2')
end
end
context 'when include_orgs is true' do
it 'returns search query including users' do
result = client.search_query(str: 'test', type: :test, include_collaborations: false, include_orgs: true)
expect(result).to eq('test in:test is:public,private user:user org:org1 org:org2')
end
end
end
end
end
......@@ -145,6 +145,8 @@ RSpec.shared_examples 'a GitHub-ish import controller: GET status' do
group.add_owner(user)
client = stub_client(repos: repos, orgs: [org], org_repos: [org_repo])
allow(client).to receive(:each_page).and_return([OpenStruct.new(objects: repos)].to_enum)
# GitHub controller has filtering done using GitHub Search API
stub_feature_flags(remove_legacy_github_client: false)
end
it 'filters list of repositories by name' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment