Commit c428b409 authored by Lin Jen-Shin's avatar Lin Jen-Shin

Merge branch...

Merge branch '32222-use-artifact-relative-urls-to-fetch-knapsack-and-flaky-tests-metadata-take-2' into 'master'

Resolve "Use artifact relative URLs to fetch Knapsack and Flaky tests metadata"

See merge request gitlab-org/gitlab!48461
parents 22f5722e f0a3686a
......@@ -38,7 +38,7 @@ review-build-cng:
- BUILD_TRIGGER_TOKEN=$REVIEW_APPS_BUILD_TRIGGER_TOKEN ./scripts/trigger-build cng
# When the job is manual, review-deploy is also manual and we don't want people
# to have to manually start the jobs in sequence, so we do it for them.
- '[ -z $CI_JOB_MANUAL ] || play_job "review-deploy"'
- '[ -z $CI_JOB_MANUAL ] || scripts/api/play_job --job-name "review-deploy"'
.review-workflow-base:
extends:
......@@ -78,8 +78,8 @@ review-deploy:
- disable_sign_ups || (delete_release && exit 1)
# When the job is manual, review-qa-smoke is also manual and we don't want people
# to have to manually start the jobs in sequence, so we do it for them.
- '[ -z $CI_JOB_MANUAL ] || play_job "review-qa-smoke"'
- '[ -z $CI_JOB_MANUAL ] || play_job "review-performance"'
- '[ -z $CI_JOB_MANUAL ] || scripts/api/play_job --job-name "review-qa-smoke"'
- '[ -z $CI_JOB_MANUAL ] || scripts/api/play_job --job-name "review-performance"'
after_script:
# Run seed-dast-test-data.sh only when DAST_RUN is set to true. This is to pupulate review app with data for DAST scan.
# Set DAST_RUN to true when jobs are manually scheduled.
......
.tests-metadata-state:
variables:
TESTS_METADATA_S3_BUCKET: "gitlab-ce-cache"
image: ruby:2.7
before_script:
- source scripts/utils.sh
artifacts:
......@@ -17,7 +16,8 @@ retrieve-tests-metadata:
- .test-metadata:rules:retrieve-tests-metadata
stage: prepare
script:
- source scripts/rspec_helpers.sh
- install_gitlab_gem
- source ./scripts/rspec_helpers.sh
- retrieve_tests_metadata
update-tests-metadata:
......
......@@ -12,8 +12,8 @@ Our current CI parallelization setup is as follows:
1. The `retrieve-tests-metadata` job in the `prepare` stage ensures we have a
`knapsack/report-master.json` file:
- The `knapsack/report-master.json` file is fetched from S3, if it's not here
we initialize the file with `{}`.
- The `knapsack/report-master.json` file is fetched from the latest `master` pipeline which runs `update-tests-metadata`
(for now it's the 2-hourly scheduled master pipeline), if it's not here we initialize the file with `{}`.
1. Each `[rspec|rspec-ee] [unit|integration|system|geo] n m` job are run with
`knapsack rspec` and should have an evenly distributed share of tests:
- It works because the jobs have access to the `knapsack/report-master.json`
......@@ -25,7 +25,7 @@ Our current CI parallelization setup is as follows:
1. The `update-tests-metadata` job (which only runs on scheduled pipelines for
[the canonical project](https://gitlab.com/gitlab-org/gitlab) takes all the
`knapsack/rspec*_pg_*.json` files and merge them all together into a single
`knapsack/report-master.json` file that is then uploaded to S3.
`knapsack/report-master.json` file that is saved as artifact.
After that, the next pipeline will use the up-to-date `knapsack/report-master.json` file.
......
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'gitlab'
require 'optparse'
require_relative 'get_job_id'
class CancelPipeline
DEFAULT_OPTIONS = {
project: ENV['CI_PROJECT_ID'],
pipeline_id: ENV['CI_PIPELINE_ID'],
api_token: ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
}.freeze
def initialize(options)
@project = options.delete(:project)
@pipeline_id = options.delete(:pipeline_id)
Gitlab.configure do |config|
config.endpoint = 'https://gitlab.com/api/v4'
config.private_token = options.delete(:api_token)
end
end
def execute
Gitlab.cancel_pipeline(project, pipeline_id)
end
private
attr_reader :project, :pipeline_id
end
if $0 == __FILE__
options = CancelPipeline::DEFAULT_OPTIONS.dup
OptionParser.new do |opts|
opts.on("-p", "--project PROJECT", String, "Project where to find the job (defaults to $CI_PROJECT_ID)") do |value|
options[:project] = value
end
opts.on("-i", "--pipeline-id PIPELINE_ID", String, "A pipeline ID (defaults to $CI_PIPELINE_ID)") do |value|
options[:pipeline_id] = value
end
opts.on("-t", "--api-token API_TOKEN", String, "A value API token with the `read_api` scope") do |value|
options[:api_token] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
CancelPipeline.new(options).execute
end
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'optparse'
require 'fileutils'
require 'uri'
require 'cgi'
require 'net/http'
class ArtifactFinder
DEFAULT_OPTIONS = {
project: ENV['CI_PROJECT_ID'],
api_token: ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
}.freeze
def initialize(options)
@project = options.delete(:project)
@job_id = options.delete(:job_id)
@api_token = options.delete(:api_token)
@artifact_path = options.delete(:artifact_path)
warn "No API token given." unless api_token
end
def execute
url = "https://gitlab.com/api/v4/projects/#{CGI.escape(project)}/jobs/#{job_id}/artifacts"
if artifact_path
FileUtils.mkdir_p(File.dirname(artifact_path))
url += "/#{artifact_path}"
end
fetch(url)
end
private
attr_reader :project, :job_id, :api_token, :artifact_path
def fetch(uri_str, limit = 10)
raise 'Too many HTTP redirects' if limit == 0
uri = URI(uri_str)
request = Net::HTTP::Get.new(uri)
request['Private-Token'] = api_token if api_token
Net::HTTP.start(uri.host, uri.port, use_ssl: true) do |http|
http.request(request) do |response|
case response
when Net::HTTPSuccess then
File.open(artifact_path || 'artifacts.zip', 'w') do |file|
response.read_body(&file.method(:write))
end
when Net::HTTPRedirection then
location = response['location']
warn "Redirected (#{limit - 1} redirections remaining)."
fetch(location, limit - 1)
else
raise "Unexpected response: #{response.value}"
end
end
end
end
end
if $0 == __FILE__
options = ArtifactFinder::DEFAULT_OPTIONS.dup
OptionParser.new do |opts|
opts.on("-p", "--project PROJECT", String, "Project where to find the job (defaults to $CI_PROJECT_ID)") do |value|
options[:project] = value
end
opts.on("-j", "--job-id JOB_ID", String, "A job ID") do |value|
options[:job_id] = value
end
opts.on("-a", "--artifact-path ARTIFACT_PATH", String, "A valid artifact path") do |value|
options[:artifact_path] = value
end
opts.on("-t", "--api-token API_TOKEN", String, "A value API token with the `read_api` scope") do |value|
options[:api_token] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
ArtifactFinder.new(options).execute
end
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'gitlab'
require 'optparse'
require 'cgi'
class JobFinder
DEFAULT_OPTIONS = {
project: ENV['CI_PROJECT_ID'],
pipeline_id: ENV['CI_PIPELINE_ID'],
pipeline_query: {},
job_query: {},
api_token: ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
}.freeze
def initialize(options)
@project = options.delete(:project)
@pipeline_query = options.delete(:pipeline_query)
@job_query = options.delete(:job_query)
@pipeline_id = options.delete(:pipeline_id)
@job_name = options.delete(:job_name)
@api_token = options.delete(:api_token)
Gitlab.configure do |config|
config.endpoint = 'https://gitlab.com/api/v4'
config.private_token = api_token if api_token
end
warn "No API token given." unless api_token
end
def execute
find_job_with_filtered_pipelines || find_job_in_pipeline
end
private
attr_reader :project, :pipeline_query, :job_query, :pipeline_id, :job_name, :api_token
def find_job_with_filtered_pipelines
return if pipeline_query.empty?
Gitlab.get(
"/projects/#{CGI.escape(project)}/pipelines",
query: pipeline_query_params,
unauthenticated: api_token.nil?
).auto_paginate do |pipeline|
Gitlab.get(
"/projects/#{CGI.escape(project)}/pipelines/#{pipeline.id}/jobs",
query: job_query_params,
unauthenticated: api_token.nil?
).auto_paginate do |job|
return job if job.name == job_name # rubocop:disable Cop/AvoidReturnFromBlocks
end
end
raise 'Job not found!'
end
def find_job_in_pipeline
return unless pipeline_id
Gitlab.get(
"/projects/#{CGI.escape(project)}/pipelines/#{pipeline_id}/jobs",
query: job_query_params,
unauthenticated: api_token.nil?
).auto_paginate do |job|
return job if job.name == job_name # rubocop:disable Cop/AvoidReturnFromBlocks
end
raise 'Job not found!'
end
def pipeline_query_params
@pipeline_query_params ||= { per_page: 100, **pipeline_query }
end
def job_query_params
@job_query_params ||= { per_page: 100, **job_query }
end
end
if $0 == __FILE__
options = JobFinder::DEFAULT_OPTIONS.dup
OptionParser.new do |opts|
opts.on("-p", "--project PROJECT", String, "Project where to find the job (defaults to $CI_PROJECT_ID)") do |value|
options[:project] = value
end
opts.on("-i", "--pipeline-id pipeline_id", String, "A pipeline ID (defaults to $CI_PIPELINE_ID)") do |value|
options[:pipeline_id] = value
end
opts.on("-q", "--pipeline-query pipeline_query", String, "Query to pass to the Pipeline API request") do |value|
options[:pipeline_query].merge!(Hash[*value.split('=')])
end
opts.on("-Q", "--job-query job_query", String, "Query to pass to the Job API request") do |value|
options[:job_query].merge!(Hash[*value.split('=')])
end
opts.on("-j", "--job-name job_name", String, "A job name that needs to exist in the found pipeline") do |value|
options[:job_name] = value
end
opts.on("-t", "--api-token API_TOKEN", String, "A value API token with the `read_api` scope") do |value|
options[:api_token] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
job = JobFinder.new(options).execute
return if job.nil?
puts job.id
end
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'gitlab'
require 'optparse'
require_relative 'get_job_id'
class PlayJob
DEFAULT_OPTIONS = {
project: ENV['CI_PROJECT_ID'],
pipeline_id: ENV['CI_PIPELINE_ID'],
api_token: ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
}.freeze
def initialize(options)
@project = options.delete(:project)
@options = options
Gitlab.configure do |config|
config.endpoint = 'https://gitlab.com/api/v4'
config.private_token = options.fetch(:api_token)
end
end
def execute
job = JobFinder.new(project, options.slice(:api_token, :pipeline_id, :job_name).merge(scope: 'manual')).execute
Gitlab.job_play(project, job.id)
end
private
attr_reader :project, :options
end
if $0 == __FILE__
options = PlayJob::DEFAULT_OPTIONS.dup
OptionParser.new do |opts|
opts.on("-p", "--project PROJECT", String, "Project where to find the job (defaults to $CI_PROJECT_ID)") do |value|
options[:project] = value
end
opts.on("-j", "--job-name JOB_NAME", String, "A job name that needs to exist in the found pipeline") do |value|
options[:job_name] = value
end
opts.on("-t", "--api-token API_TOKEN", String, "A value API token with the `read_api` scope") do |value|
options[:api_token] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
PlayJob.new(options).execute
end
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'gitlab'
require 'optparse'
#
# Configure credentials to be used with gitlab gem
#
Gitlab.configure do |config|
config.endpoint = 'https://gitlab.com/api/v4'
config.private_token = ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
end
options = {}
OptionParser.new do |opts|
opts.on("-s", "--scope=SCOPE", "Find job with matching scope") do |scope|
options[:scope] = scope
end
end.parse!
class PipelineJobFinder
def initialize(project_id, pipeline_id, job_name, options)
@project_id = project_id
@pipeline_id = pipeline_id
@job_name = job_name
@options = options
end
def execute
Gitlab.pipeline_jobs(@project_id, @pipeline_id, @options).auto_paginate do |job|
break job if job.name == @job_name
end
end
end
project_id, pipeline_id, job_name = ARGV
job = PipelineJobFinder.new(project_id, pipeline_id, job_name, options).execute
return if job.nil?
puts job.id
#!/usr/bin/env bash
function retrieve_tests_metadata() {
mkdir -p knapsack/ rspec_flaky/ rspec_profiling/
mkdir -p crystalball/ knapsack/ rspec_flaky/ rspec_profiling/
local project_path="gitlab-org/gitlab"
local test_metadata_job_id
# Ruby
test_metadata_job_id=$(scripts/api/get_job_id --project "${project_path}" -q "status=success" -q "ref=master" -q "username=gitlab-bot" -Q "scope=success" --job-name "update-tests-metadata")
if [[ ! -f "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" ]]; then
wget -O "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
scripts/api/download_job_artifact --project "${project_path}" --job-id "${test_metadata_job_id}" --artifact-path "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
fi
if [[ ! -f "${FLAKY_RSPEC_SUITE_REPORT_PATH}" ]]; then
wget -O "${FLAKY_RSPEC_SUITE_REPORT_PATH}" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${FLAKY_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
scripts/api/download_job_artifact --project "${project_path}" --job-id "${test_metadata_job_id}" --artifact-path "${FLAKY_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
fi
# FIXME: We will need to find a pipeline where the $RSPEC_PACKED_TESTS_MAPPING_PATH.gz actually exists (Crystalball only runs every two-hours, but the `update-tests-metadata` runs for all `master` pipelines...).
# if [[ ! -f "${RSPEC_PACKED_TESTS_MAPPING_PATH}" ]]; then
# (scripts/api/download_job_artifact --project "${project_path}" --job-id "${test_metadata_job_id}" --artifact-path "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" && gzip -d "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz") || echo "{}" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
# fi
#
# scripts/unpack-test-mapping "${RSPEC_PACKED_TESTS_MAPPING_PATH}" "${RSPEC_TESTS_MAPPING_PATH}"
}
function update_tests_metadata() {
echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
scripts/merge-reports "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" knapsack/rspec*.json
if [[ -n "${TESTS_METADATA_S3_BUCKET}" ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
else
echo "Not uplaoding report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f knapsack/rspec*.json
scripts/merge-reports "${FLAKY_RSPEC_SUITE_REPORT_PATH}" rspec_flaky/all_*.json
export FLAKY_RSPEC_GENERATE_REPORT="true"
scripts/merge-reports "${FLAKY_RSPEC_SUITE_REPORT_PATH}" rspec_flaky/all_*.json
scripts/flaky_examples/prune-old-flaky-examples "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
if [[ -n ${TESTS_METADATA_S3_BUCKET} ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
else
echo "Not uploading report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
......@@ -48,16 +43,6 @@ function update_tests_metadata() {
fi
}
function retrieve_tests_mapping() {
mkdir -p crystalball/
if [[ ! -f "${RSPEC_PACKED_TESTS_MAPPING_PATH}" ]]; then
(wget -O "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" && gzip -d "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz") || echo "{}" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
fi
scripts/unpack-test-mapping "${RSPEC_PACKED_TESTS_MAPPING_PATH}" "${RSPEC_TESTS_MAPPING_PATH}"
}
function update_tests_mapping() {
if ! crystalball_rspec_data_exists; then
echo "No crystalball rspec data found."
......@@ -65,20 +50,9 @@ function update_tests_mapping() {
fi
scripts/generate-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" crystalball/rspec*.yml
scripts/pack-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
gzip "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
if [[ -n "${TESTS_METADATA_S3_BUCKET}" ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz"
else
echo "Not uploading report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f crystalball/rspec*.yml
rm -f crystalball/rspec*.yml "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
}
function crystalball_rspec_data_exists() {
......
......@@ -87,65 +87,14 @@ function echosuccess() {
fi
}
function get_job_id() {
local job_name="${1}"
local query_string="${2:+&${2}}"
local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}"
if [ -z "${api_token}" ]; then
echoerr "Please provide an API token with \$API_TOKEN or \$GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN."
return
fi
local max_page=3
local page=1
while true; do
local url="https://gitlab.com/api/v4/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/jobs?per_page=100&page=${page}${query_string}"
echoinfo "GET ${url}"
local job_id
job_id=$(curl --silent --show-error --header "PRIVATE-TOKEN: ${api_token}" "${url}" | jq "map(select(.name == \"${job_name}\")) | map(.id) | last")
[[ "${job_id}" == "null" && "${page}" -lt "$max_page" ]] || break
let "page++"
done
if [[ "${job_id}" == "null" ]]; then # jq prints "null" for non-existent attribute
echoerr "The '${job_name}' job ID couldn't be retrieved!"
else
echoinfo "The '${job_name}' job ID is ${job_id}"
echo "${job_id}"
fi
}
function play_job() {
local job_name="${1}"
local job_id
job_id=$(get_job_id "${job_name}" "scope=manual");
if [ -z "${job_id}" ]; then return; fi
local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}"
if [ -z "${api_token}" ]; then
echoerr "Please provide an API token with \$API_TOKEN or \$GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN."
return
fi
local url="https://gitlab.com/api/v4/projects/${CI_PROJECT_ID}/jobs/${job_id}/play"
echoinfo "POST ${url}"
local job_url
job_url=$(curl --silent --show-error --request POST --header "PRIVATE-TOKEN: ${api_token}" "${url}" | jq ".web_url")
echoinfo "Manual job '${job_name}' started at: ${job_url}"
}
function fail_pipeline_early() {
local dont_interrupt_me_job_id
dont_interrupt_me_job_id=$(get_job_id 'dont-interrupt-me' 'scope=success')
dont_interrupt_me_job_id=$(scripts/api/get_job_id --job-query "scope=success" --job-name "dont-interrupt-me")
if [[ -n "${dont_interrupt_me_job_id}" ]]; then
echoinfo "This pipeline cannot be interrupted due to \`dont-interrupt-me\` job ${dont_interrupt_me_job_id}"
else
echoinfo "Failing pipeline early for fast feedback due to test failures in rspec fail-fast."
curl --request POST --header "PRIVATE-TOKEN: ${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}" "https://${CI_SERVER_HOST}/api/v4/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/cancel"
scripts/api/cancel_pipeline
fi
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment