Commit b1006279 authored by Lin Jen-Shin's avatar Lin Jen-Shin

Merge branch 'better-distribute-static-analysis-tasks' into 'master'

ci: Distribute static-analysis tasks more intelligently

See merge request gitlab-org/gitlab!67052
parents 3d00d899 ddcd4b58
...@@ -14,51 +14,71 @@ class StaticAnalysis ...@@ -14,51 +14,71 @@ class StaticAnalysis
"Browserslist: caniuse-lite is outdated. Please run next command `yarn upgrade`" "Browserslist: caniuse-lite is outdated. Please run next command `yarn upgrade`"
].freeze ].freeze
Task = Struct.new(:command, :duration) do
def cmd
command.join(' ')
end
end
NodeAssignment = Struct.new(:index, :tasks) do
def total_duration
return 0 if tasks.empty?
tasks.sum(&:duration)
end
end
# `gettext:updated_check` and `gitlab:sidekiq:sidekiq_queues_yml:check` will fail on FOSS installations # `gettext:updated_check` and `gitlab:sidekiq:sidekiq_queues_yml:check` will fail on FOSS installations
# (e.g. gitlab-org/gitlab-foss) since they test against a single # (e.g. gitlab-org/gitlab-foss) since they test against a single
# file that is generated by an EE installation, which can # file that is generated by an EE installation, which can
# contain values that a FOSS installation won't find. To work # contain values that a FOSS installation won't find. To work
# around this we will only enable this task on EE installations. # around this we will only enable this task on EE installations.
TASKS_BY_DURATIONS_SECONDS_DESC = { TASKS_WITH_DURATIONS_SECONDS = [
%w[bin/rake lint:haml] => 800, Task.new(%w[bin/rake lint:haml], 562),
# We need to disable the cache for this cop since it creates files under tmp/feature_flags/*.used, # We need to disable the cache for this cop since it creates files under tmp/feature_flags/*.used,
# the cache would prevent these files from being created. # the cache would prevent these files from being created.
%w[bundle exec rubocop --only Gitlab/MarkUsedFeatureFlags --cache false] => 600, Task.new(%w[bundle exec rubocop --only Gitlab/MarkUsedFeatureFlags --cache false], 800),
(Gitlab.ee? ? %w[bin/rake gettext:updated_check] : nil) => 360, (Gitlab.ee? ? Task.new(%w[bin/rake gettext:updated_check], 360) : nil),
%w[yarn run lint:eslint:all] => 312, Task.new(%w[yarn run lint:eslint:all], 312),
%w[yarn run lint:prettier] => 162, Task.new(%w[bundle exec rubocop --parallel], 60),
%w[bin/rake gettext:lint] => 65, Task.new(%w[yarn run lint:prettier], 160),
%w[bundle exec license_finder] => 61, Task.new(%w[bin/rake gettext:lint], 85),
%w[bin/rake lint:static_verification] => 45, Task.new(%w[bundle exec license_finder], 20),
%w[bundle exec rubocop --parallel] => 40, Task.new(%w[bin/rake lint:static_verification], 35),
%w[bin/rake config_lint] => 26, Task.new(%w[bin/rake config_lint], 10),
%w[bin/rake gitlab:sidekiq:all_queues_yml:check] => 15, Task.new(%w[bin/rake gitlab:sidekiq:all_queues_yml:check], 15),
(Gitlab.ee? ? %w[bin/rake gitlab:sidekiq:sidekiq_queues_yml:check] : nil) => 11, (Gitlab.ee? ? Task.new(%w[bin/rake gitlab:sidekiq:sidekiq_queues_yml:check], 11) : nil),
%w[yarn run internal:stylelint] => 8, Task.new(%w[yarn run internal:stylelint], 8),
%w[scripts/lint-conflicts.sh] => 1, Task.new(%w[scripts/lint-conflicts.sh], 1),
%w[yarn run block-dependencies] => 1, Task.new(%w[yarn run block-dependencies], 1),
%w[scripts/lint-rugged] => 1, Task.new(%w[scripts/lint-rugged], 1),
%w[scripts/gemfile_lock_changed.sh] => 1, Task.new(%w[scripts/gemfile_lock_changed.sh], 1),
%w[scripts/frontend/check_no_partial_karma_jest.sh] => 1 Task.new(%w[scripts/frontend/check_no_partial_karma_jest.sh], 1)
}.reject { |k| k.nil? }.sort_by { |a| -a[1] }.to_h.keys.freeze ].compact.freeze
def run_tasks! def run_tasks!(options = {})
tasks = tasks_to_run((ENV['CI_NODE_INDEX'] || 1).to_i, (ENV['CI_NODE_TOTAL'] || 1).to_i) node_assignment = tasks_to_run((ENV['CI_NODE_TOTAL'] || 1).to_i)[(ENV['CI_NODE_INDEX'] || 1).to_i - 1]
if options[:dry_run]
puts "Dry-run mode!"
return
end
static_analysis = Gitlab::Popen::Runner.new static_analysis = Gitlab::Popen::Runner.new
start_time = Time.now
static_analysis.run(tasks) do |cmd, &run| static_analysis.run(node_assignment.tasks.map(&:command)) do |command, &run|
task = node_assignment.tasks.find { |task| task.command == command }
puts puts
puts "$ #{cmd.join(' ')}" puts "$ #{task.cmd}"
result = run.call result = run.call
puts "==> Finished in #{result.duration} seconds" puts "==> Finished in #{result.duration} seconds (expected #{task.duration} seconds)"
puts puts
end end
puts puts
puts '===================================================' puts '==================================================='
puts "Node finished running all tasks in #{Time.now - start_time} seconds (expected #{node_assignment.total_duration})"
puts puts
puts puts
...@@ -107,16 +127,66 @@ class StaticAnalysis ...@@ -107,16 +127,66 @@ class StaticAnalysis
.count { |result| !ALLOWED_WARNINGS.include?(result.stderr.strip) } .count { |result| !ALLOWED_WARNINGS.include?(result.stderr.strip) }
end end
def tasks_to_run(node_index, node_total) def tasks_to_run(node_total)
tasks = [] total_time = TASKS_WITH_DURATIONS_SECONDS.sum(&:duration).to_f
TASKS_BY_DURATIONS_SECONDS_DESC.each_with_index do |task, i| ideal_time_per_node = total_time / node_total
tasks << task if i % node_total == (node_index - 1) tasks_by_duration_desc = TASKS_WITH_DURATIONS_SECONDS.sort_by { |a| -a.duration }
nodes = Array.new(node_total) { |i| NodeAssignment.new(i + 1, []) }
puts "Total expected time: #{total_time}; ideal time per job: #{ideal_time_per_node}.\n\n"
puts "Tasks to distribute:"
tasks_by_duration_desc.each { |task| puts "* #{task.cmd} (#{task.duration}s)" }
# Distribute tasks optimally first
puts "\nAssigning tasks optimally."
distribute_tasks(tasks_by_duration_desc, nodes, ideal_time_per_node: ideal_time_per_node)
# Distribute remaining tasks, ordered by ascending duration
leftover_tasks = tasks_by_duration_desc - nodes.flat_map(&:tasks)
if leftover_tasks.any?
puts "\n\nAssigning remaining tasks: #{leftover_tasks.flat_map(&:cmd)}"
distribute_tasks(leftover_tasks, nodes.sort_by { |node| node.total_duration })
end
nodes.each do |node|
puts "\nExpected duration for node #{node.index}: #{node.total_duration} seconds"
node.tasks.each { |task| puts "* #{task.cmd} (#{task.duration}s)" }
end
nodes
end
def distribute_tasks(tasks, nodes, ideal_time_per_node: nil)
condition =
if ideal_time_per_node
->(task, node, ideal_time_per_node) { (task.duration + node.total_duration) <= ideal_time_per_node }
else
->(*) { true }
end
tasks.each do |task|
nodes.each do |node|
if condition.call(task, node, ideal_time_per_node)
assign_task_to_node(tasks, node, task)
break
end
end
end end
end
tasks def assign_task_to_node(remaining_tasks, node, task)
node.tasks << task
puts "Assigning #{task.command} (#{task.duration}s) to node ##{node.index}. Node total duration: #{node.total_duration}s."
end end
end end
if $0 == __FILE__ if $0 == __FILE__
StaticAnalysis.new.run_tasks! options = {}
if ARGV.include?('--dry-run')
options[:dry_run] = true
end
StaticAnalysis.new.run_tasks!(options)
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment