Commit 54ef620c authored by Jose Ivan Vargas's avatar Jose Ivan Vargas

Merge branch 'georgekoltsov/use-github-api-for-filtering' into 'master'

Filter GitHub projects to import using GitHub Search API

See merge request gitlab-org/gitlab!47002
parents 932ca4d1 f8c613f1
......@@ -7,11 +7,14 @@ import { visitUrl, objectToQuery } from '~/lib/utils/url_utility';
import { deprecatedCreateFlash as createFlash } from '~/flash';
import { s__, sprintf } from '~/locale';
import axios from '~/lib/utils/axios_utils';
import httpStatusCodes from '~/lib/utils/http_status';
import { capitalizeFirstCharacter } from '~/lib/utils/text_utility';
let eTagPoll;
const hasRedirectInError = e => e?.response?.data?.error?.redirect;
const redirectToUrlInError = e => visitUrl(e.response.data.error.redirect);
const tooManyRequests = e => e.response.status === httpStatusCodes.TOO_MANY_REQUESTS;
const pathWithParams = ({ path, ...params }) => {
const filteredParams = Object.fromEntries(
Object.entries(params).filter(([, value]) => value !== ''),
......@@ -71,6 +74,14 @@ const fetchReposFactory = ({ reposPath = isRequired() }) => ({ state, commit })
if (hasRedirectInError(e)) {
redirectToUrlInError(e);
} else if (tooManyRequests(e)) {
createFlash(
sprintf(s__('ImportProjects|%{provider} rate limit exceeded. Try again later'), {
provider: capitalizeFirstCharacter(provider),
}),
);
commit(types.RECEIVE_REPOS_ERROR);
} else {
createFlash(
sprintf(s__('ImportProjects|Requesting your %{provider} repositories failed'), {
......
......@@ -22,6 +22,7 @@ const httpStatusCodes = {
CONFLICT: 409,
GONE: 410,
UNPROCESSABLE_ENTITY: 422,
TOO_MANY_REQUESTS: 429,
INTERNAL_SERVER_ERROR: 500,
SERVICE_UNAVAILABLE: 503,
};
......
......@@ -15,6 +15,7 @@ class Import::GithubController < Import::BaseController
rescue_from OAuthConfigMissingError, with: :missing_oauth_config
rescue_from Octokit::Unauthorized, with: :provider_unauthorized
rescue_from Octokit::TooManyRequests, with: :provider_rate_limit
rescue_from Gitlab::GithubImport::RateLimitError, with: :rate_limit_threshold_exceeded
def new
if !ci_cd_only? && github_import_configured? && logged_in_with_provider?
......@@ -114,7 +115,7 @@ class Import::GithubController < Import::BaseController
def client_repos
@client_repos ||= if Feature.enabled?(:remove_legacy_github_client)
filtered(concatenated_repos)
concatenated_repos
else
filtered(client.repos)
end
......@@ -122,8 +123,15 @@ class Import::GithubController < Import::BaseController
def concatenated_repos
return [] unless client.respond_to?(:each_page)
return client.each_page(:repos).flat_map(&:objects) unless sanitized_filter_param
client.each_page(:repos).flat_map(&:objects)
client.search_repos_by_name(sanitized_filter_param).flat_map(&:objects).flat_map(&:items)
end
def sanitized_filter_param
super
@filter = @filter&.tr(' ', '')&.tr(':', '')
end
def oauth_client
......@@ -245,6 +253,10 @@ class Import::GithubController < Import::BaseController
def extra_import_params
{}
end
def rate_limit_threshold_exceeded
head :too_many_requests
end
end
Import::GithubController.prepend_if_ee('EE::Import::GithubController')
---
title: Filter GitHub projects to import using GitHub Search API
merge_request: 47002
author:
type: changed
......@@ -18,6 +18,8 @@ module Gitlab
attr_reader :octokit
SEARCH_MAX_REQUESTS_PER_MINUTE = 30
# A single page of data and the corresponding page number.
Page = Struct.new(:objects, :number)
......@@ -28,6 +30,7 @@ module Gitlab
# rate limit at once. The threshold is put in place to not hit the limit
# in most cases.
RATE_LIMIT_THRESHOLD = 50
SEARCH_RATE_LIMIT_THRESHOLD = 3
# token - The GitHub API token to use.
#
......@@ -152,8 +155,26 @@ module Gitlab
end
end
def search_repos_by_name(name)
each_page(:search_repositories, search_query(str: name, type: :name))
end
def search_query(str:, type:, include_collaborations: true, include_orgs: true)
query = "#{str} in:#{type} is:public,private user:#{octokit.user.login}"
query = [query, collaborations_subquery].join(' ') if include_collaborations
query = [query, organizations_subquery].join(' ') if include_orgs
query
end
# Returns `true` if we're still allowed to perform API calls.
# Search API has rate limit of 30, use lowered threshold when search is used.
def requests_remaining?
if requests_limit == SEARCH_MAX_REQUESTS_PER_MINUTE
return remaining_requests > SEARCH_RATE_LIMIT_THRESHOLD
end
remaining_requests > RATE_LIMIT_THRESHOLD
end
......@@ -161,6 +182,10 @@ module Gitlab
octokit.rate_limit.remaining
end
def requests_limit
octokit.rate_limit.limit
end
def raise_or_wait_for_rate_limit
rate_limit_counter.increment
......@@ -221,6 +246,20 @@ module Gitlab
'The number of GitHub API calls performed when importing projects'
)
end
private
def collaborations_subquery
each_object(:repos, nil, { affiliation: 'collaborator' })
.map { |repo| "repo:#{repo.full_name}" }
.join(' ')
end
def organizations_subquery
each_object(:organizations)
.map { |org| "org:#{org.login}" }
.join(' ')
end
end
end
end
......@@ -14157,6 +14157,9 @@ msgstr ""
msgid "ImportButtons|Connect repositories from"
msgstr ""
msgid "ImportProjects|%{provider} rate limit exceeded. Try again later"
msgstr ""
msgid "ImportProjects|Blocked import URL: %{message}"
msgstr ""
......
......@@ -144,6 +144,58 @@ RSpec.describe Import::GithubController do
expect(json_response.dig('provider_repos', 0, 'id')).to eq(repo_1.id)
expect(json_response.dig('provider_repos', 1, 'id')).to eq(repo_2.id)
end
context 'when filtering' do
let(:filter) { 'test' }
let(:user_login) { 'user' }
let(:collaborations_subquery) { 'repo:repo1 repo:repo2' }
let(:organizations_subquery) { 'org:org1 org:org2' }
before do
allow_next_instance_of(Octokit::Client) do |client|
allow(client).to receive(:user).and_return(double(login: user_login))
end
end
it 'makes request to github search api' do
expected_query = "test in:name is:public,private user:#{user_login} #{collaborations_subquery} #{organizations_subquery}"
expect_next_instance_of(Gitlab::GithubImport::Client) do |client|
expect(client).to receive(:collaborations_subquery).and_return(collaborations_subquery)
expect(client).to receive(:organizations_subquery).and_return(organizations_subquery)
expect(client).to receive(:each_page).with(:search_repositories, expected_query).and_return([].to_enum)
end
get :status, params: { filter: filter }, format: :json
end
context 'when user input contains colons and spaces' do
before do
stub_client(search_repos_by_name: [])
end
it 'sanitizes user input' do
filter = ' test1:test2 test3 : test4 '
expected_filter = 'test1test2test3test4'
get :status, params: { filter: filter }, format: :json
expect(assigns(:filter)).to eq(expected_filter)
end
end
context 'when rate limit threshold is exceeded' do
before do
allow(controller).to receive(:status).and_raise(Gitlab::GithubImport::RateLimitError)
end
it 'returns 429' do
get :status, params: { filter: 'test' }, format: :json
expect(response).to have_gitlab_http_status(:too_many_requests)
end
end
end
end
end
......
......@@ -69,6 +69,7 @@ describe('import_projects store actions', () => {
importStatus: STATUSES.NONE,
},
],
provider: 'provider',
};
localState.getImportTarget = getImportTarget(localState);
......@@ -150,7 +151,28 @@ describe('import_projects store actions', () => {
);
});
describe('when /home/xanf/projects/gdk/gitlab/spec/frontend/import_projects/store/actions_spec.jsfiltered', () => {
describe('when rate limited', () => {
it('commits RECEIVE_REPOS_ERROR and shows rate limited error message', async () => {
mock.onGet(`${TEST_HOST}/endpoint.json?filter=filter`).reply(429);
await testAction(
fetchRepos,
null,
{ ...localState, filter: 'filter' },
[
{ type: SET_PAGE, payload: 1 },
{ type: REQUEST_REPOS },
{ type: SET_PAGE, payload: 0 },
{ type: RECEIVE_REPOS_ERROR },
],
[],
);
expect(createFlash).toHaveBeenCalledWith('Provider rate limit exceeded. Try again later');
});
});
describe('when filtered', () => {
it('fetches repos with filter applied', () => {
mock.onGet(`${TEST_HOST}/endpoint.json?filter=filter`).reply(200, payload);
......
......@@ -203,6 +203,11 @@ RSpec.describe Gitlab::GithubImport::Client do
describe '#requests_remaining?' do
let(:client) { described_class.new('foo') }
context 'when default requests limit is set' do
before do
allow(client).to receive(:requests_limit).and_return(5000)
end
it 'returns true if enough requests remain' do
expect(client).to receive(:remaining_requests).and_return(9000)
......@@ -216,6 +221,25 @@ RSpec.describe Gitlab::GithubImport::Client do
end
end
context 'when search requests limit is set' do
before do
allow(client).to receive(:requests_limit).and_return(described_class::SEARCH_MAX_REQUESTS_PER_MINUTE)
end
it 'returns true if enough requests remain' do
expect(client).to receive(:remaining_requests).and_return(described_class::SEARCH_RATE_LIMIT_THRESHOLD + 1)
expect(client.requests_remaining?).to eq(true)
end
it 'returns false if not enough requests remain' do
expect(client).to receive(:remaining_requests).and_return(described_class::SEARCH_RATE_LIMIT_THRESHOLD - 1)
expect(client.requests_remaining?).to eq(false)
end
end
end
describe '#raise_or_wait_for_rate_limit' do
it 'raises RateLimitError when running in parallel mode' do
client = described_class.new('foo', parallel: true)
......@@ -262,6 +286,16 @@ RSpec.describe Gitlab::GithubImport::Client do
end
end
describe '#requests_limit' do
it 'returns requests limit' do
client = described_class.new('foo')
rate_limit = double(limit: 1)
expect(client.octokit).to receive(:rate_limit).and_return(rate_limit)
expect(client.requests_limit).to eq(1)
end
end
describe '#rate_limit_resets_in' do
it 'returns the number of seconds after which the rate limit is reset' do
client = described_class.new('foo')
......@@ -417,4 +451,61 @@ RSpec.describe Gitlab::GithubImport::Client do
expect(client.rate_limiting_enabled?).to eq(false)
end
end
describe 'search' do
let(:client) { described_class.new('foo') }
let(:user) { double(:user, login: 'user') }
let(:org1) { double(:org, login: 'org1') }
let(:org2) { double(:org, login: 'org2') }
let(:repo1) { double(:repo, full_name: 'repo1') }
let(:repo2) { double(:repo, full_name: 'repo2') }
before do
allow(client)
.to receive(:each_object)
.with(:repos, nil, { affiliation: 'collaborator' })
.and_return([repo1, repo2].to_enum)
allow(client)
.to receive(:each_object)
.with(:organizations)
.and_return([org1, org2].to_enum)
allow(client.octokit).to receive(:user).and_return(user)
end
describe '#search_repos_by_name' do
it 'searches for repositories based on name' do
expected_search_query = 'test in:name is:public,private user:user repo:repo1 repo:repo2 org:org1 org:org2'
expect(client).to receive(:each_page).with(:search_repositories, expected_search_query)
client.search_repos_by_name('test')
end
end
describe '#search_query' do
it 'returns base search query' do
result = client.search_query(str: 'test', type: :test, include_collaborations: false, include_orgs: false)
expect(result).to eq('test in:test is:public,private user:user')
end
context 'when include_collaborations is true' do
it 'returns search query including users' do
result = client.search_query(str: 'test', type: :test, include_collaborations: true, include_orgs: false)
expect(result).to eq('test in:test is:public,private user:user repo:repo1 repo:repo2')
end
end
context 'when include_orgs is true' do
it 'returns search query including users' do
result = client.search_query(str: 'test', type: :test, include_collaborations: false, include_orgs: true)
expect(result).to eq('test in:test is:public,private user:user org:org1 org:org2')
end
end
end
end
end
......@@ -145,6 +145,8 @@ RSpec.shared_examples 'a GitHub-ish import controller: GET status' do
group.add_owner(user)
client = stub_client(repos: repos, orgs: [org], org_repos: [org_repo])
allow(client).to receive(:each_page).and_return([OpenStruct.new(objects: repos)].to_enum)
# GitHub controller has filtering done using GitHub Search API
stub_feature_flags(remove_legacy_github_client: false)
end
it 'filters list of repositories by name' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment