Commit d4b10599 authored by Rémy Coutable's avatar Rémy Coutable

Improve and simplify the distribution of static analysis tasks

Signed-off-by: default avatarRémy Coutable <remy@rymai.me>
parent 0bdd20eb
...@@ -14,38 +14,49 @@ class StaticAnalysis ...@@ -14,38 +14,49 @@ class StaticAnalysis
"Browserslist: caniuse-lite is outdated. Please run next command `yarn upgrade`" "Browserslist: caniuse-lite is outdated. Please run next command `yarn upgrade`"
].freeze ].freeze
Task = Struct.new(:command, :duration) do
def cmd
command.join(' ')
end
end
NodeAssignment = Struct.new(:index, :tasks, :total_duration) do
def total_duration
return 0 if tasks.empty?
tasks.sum(&:duration)
end
end
# `gettext:updated_check` and `gitlab:sidekiq:sidekiq_queues_yml:check` will fail on FOSS installations # `gettext:updated_check` and `gitlab:sidekiq:sidekiq_queues_yml:check` will fail on FOSS installations
# (e.g. gitlab-org/gitlab-foss) since they test against a single # (e.g. gitlab-org/gitlab-foss) since they test against a single
# file that is generated by an EE installation, which can # file that is generated by an EE installation, which can
# contain values that a FOSS installation won't find. To work # contain values that a FOSS installation won't find. To work
# around this we will only enable this task on EE installations. # around this we will only enable this task on EE installations.
TASKS_WITH_DURATIONS_SECONDS = { TASKS_WITH_DURATIONS_SECONDS = [
%w[bin/rake lint:haml] => 800, Task.new(%w[bin/rake lint:haml], 562),
# We need to disable the cache for this cop since it creates files under tmp/feature_flags/*.used, # We need to disable the cache for this cop since it creates files under tmp/feature_flags/*.used,
# the cache would prevent these files from being created. # the cache would prevent these files from being created.
%w[bundle exec rubocop --only Gitlab/MarkUsedFeatureFlags --cache false] => 600, Task.new(%w[bundle exec rubocop --only Gitlab/MarkUsedFeatureFlags --cache false], 800),
(Gitlab.ee? ? %w[bin/rake gettext:updated_check] : nil) => 360, (Gitlab.ee? ? Task.new(%w[bin/rake gettext:updated_check], 360) : nil),
%w[yarn run lint:eslint:all] => 312, Task.new(%w[yarn run lint:eslint:all], 312),
%w[bundle exec rubocop --parallel] => 300, Task.new(%w[bundle exec rubocop --parallel], 60),
%w[yarn run lint:prettier] => 162, Task.new(%w[yarn run lint:prettier], 160),
%w[bin/rake gettext:lint] => 65, Task.new(%w[bin/rake gettext:lint], 85),
%w[bundle exec license_finder] => 61, Task.new(%w[bundle exec license_finder], 20),
%w[bin/rake lint:static_verification] => 45, Task.new(%w[bin/rake lint:static_verification], 35),
%w[bin/rake config_lint] => 26, Task.new(%w[bin/rake config_lint], 10),
%w[bin/rake gitlab:sidekiq:all_queues_yml:check] => 15, Task.new(%w[bin/rake gitlab:sidekiq:all_queues_yml:check], 15),
(Gitlab.ee? ? %w[bin/rake gitlab:sidekiq:sidekiq_queues_yml:check] : nil) => 11, (Gitlab.ee? ? Task.new(%w[bin/rake gitlab:sidekiq:sidekiq_queues_yml:check], 11) : nil),
%w[yarn run internal:stylelint] => 8, Task.new(%w[yarn run internal:stylelint], 8),
%w[scripts/lint-conflicts.sh] => 1, Task.new(%w[scripts/lint-conflicts.sh], 1),
%w[yarn run block-dependencies] => 1, Task.new(%w[yarn run block-dependencies], 1),
%w[scripts/lint-rugged] => 1, Task.new(%w[scripts/lint-rugged], 1),
%w[scripts/gemfile_lock_changed.sh] => 1, Task.new(%w[scripts/gemfile_lock_changed.sh], 1),
%w[scripts/frontend/check_no_partial_karma_jest.sh] => 1 Task.new(%w[scripts/frontend/check_no_partial_karma_jest.sh], 1)
}.reject { |k| k.nil? }.freeze ].reject { |t| t.nil? }.freeze
StaticAnalysisTasks = Struct.new(:tasks, :duration)
def run_tasks!(options = {}) def run_tasks!(options = {})
node_tasks = tasks_to_run((ENV['CI_NODE_TOTAL'] || 1).to_i, debug: options[:debug])[(ENV['CI_NODE_INDEX'] || 1).to_i - 1] node_assignment = tasks_to_run((ENV['CI_NODE_TOTAL'] || 1).to_i)[(ENV['CI_NODE_INDEX'] || 1).to_i - 1]
if options[:dry_run] if options[:dry_run]
puts "Dry-run mode!" puts "Dry-run mode!"
...@@ -53,19 +64,21 @@ class StaticAnalysis ...@@ -53,19 +64,21 @@ class StaticAnalysis
end end
static_analysis = Gitlab::Popen::Runner.new static_analysis = Gitlab::Popen::Runner.new
start_time = Time.now
static_analysis.run(node_tasks.tasks) do |cmd, &run| static_analysis.run(node_assignment.tasks.map(&:command)) do |command, &run|
task = node_assignment.tasks.find { |task| task.command == command }
puts puts
puts "$ #{cmd.join(' ')}" puts "$ #{task.cmd}"
result = run.call result = run.call
puts "==> Finished in #{result.duration} seconds" puts "==> Finished in #{result.duration} seconds (expected #{task.duration} seconds)"
puts puts
end end
puts puts
puts '===================================================' puts '==================================================='
puts "Node finished running all tasks in #{Time.now - start_time} seconds (expected #{node_assignment.total_duration})"
puts puts
puts puts
...@@ -114,49 +127,57 @@ class StaticAnalysis ...@@ -114,49 +127,57 @@ class StaticAnalysis
.count { |result| !ALLOWED_WARNINGS.include?(result.stderr.strip) } .count { |result| !ALLOWED_WARNINGS.include?(result.stderr.strip) }
end end
def tasks_to_run(node_total, debug: false) def tasks_to_run(node_total)
tasks_per_node = Array.new(node_total) { StaticAnalysisTasks.new([], 0) } total_time = TASKS_WITH_DURATIONS_SECONDS.sum(&:duration).to_f
total_time = TASKS_WITH_DURATIONS_SECONDS.values.sum.to_f
ideal_time_per_job = total_time / node_total ideal_time_per_job = total_time / node_total
tasks_by_duration_desc = TASKS_WITH_DURATIONS_SECONDS.sort_by { |a| -a[1] }.to_h tasks_by_duration_desc = TASKS_WITH_DURATIONS_SECONDS.sort_by { |a| -a.duration }
nodes = Array.new(node_total) { |i| NodeAssignment.new(i + 1, [], 0) }
p "total_time: #{total_time}" if debug
p "ideal_time_per_job: #{ideal_time_per_job}" if debug puts "Total expected time: #{total_time}; ideal time per job: #{ideal_time_per_job}.\n\n"
puts "Tasks to distribute:"
tasks_by_duration_desc.each_with_index do |(task, duration), i| tasks_by_duration_desc.each { |task| puts "* #{task.cmd} (#{task.duration}s)" }
puts "Assigning #{task}..." if debug
(0...node_total).each do |node_index|
puts "Current node: #{node_index}..." if debug
# Task is already longer than the ideal time
if duration >= ideal_time_per_job && tasks_per_node[node_index].tasks.empty?
puts "Assigning #{task} to node #{node_index} (#{duration}s)." if debug
assign_task_to_node(tasks_by_duration_desc, tasks_per_node[node_index], task, duration)
break
elsif tasks_per_node[node_index].duration + duration <= ideal_time_per_job
puts "Assigning #{task} to node #{node_index} (#{duration}s)." if debug
assign_task_to_node(tasks_by_duration_desc, tasks_per_node[node_index], task, duration)
break
else
puts "Node #{node_index} is already full (#{tasks_per_node[node_index]})" if debug
end
end
end
raise "There are unassigned tasks: #{tasks_by_duration_desc}" unless tasks_by_duration_desc.empty? # Distribute tasks optimally first
puts "\nAssigning tasks optimally."
distribute_tasks(tasks_by_duration_desc, nodes, ideal_time_per_job: ideal_time_per_job)
tasks_per_node.each_with_index do |node, i| # Distribute remaining tasks, ordered by ascending duration
puts "\nExpected duration for node #{i + 1}: #{node.duration}" leftover_tasks = tasks_by_duration_desc - nodes.flat_map(&:tasks)
node.tasks.each { |task| puts "- #{task.join(' ')}" }
if leftover_tasks.any?
puts "\n\nAssigning remaining tasks: #{leftover_tasks.flat_map(&:cmd)}"
distribute_tasks(leftover_tasks, nodes.sort_by { |node| node.total_duration })
end
nodes.each do |node|
puts "\nExpected duration for node #{node.index}: #{node.total_duration} seconds"
node.tasks.each { |task| puts "* #{task.cmd} (#{task.duration}s)" }
end end
tasks_per_node nodes
end end
def assign_task_to_node(remaining_tasks, node, task_name, duration) def distribute_tasks(tasks, nodes, ideal_time_per_job: nil)
node.tasks << task_name condition =
node.duration += duration if ideal_time_per_job
remaining_tasks.delete(task_name) ->(task, node, ideal_time_per_job) { (task.duration + node.total_duration) <= ideal_time_per_job }
else
->(*) { true }
end
tasks.each do |task|
nodes.each do |node|
if condition.call(task, node, ideal_time_per_job)
assign_task_to_node(tasks, node, task)
break
end
end
end
end
def assign_task_to_node(remaining_tasks, node, task)
node.tasks << task
puts "Assigning #{task.command} (#{task.duration}s) to node ##{node.index}. Node total duration: #{node.total_duration}s."
end end
end end
...@@ -167,9 +188,5 @@ if $0 == __FILE__ ...@@ -167,9 +188,5 @@ if $0 == __FILE__
options[:dry_run] = true options[:dry_run] = true
end end
if ARGV.include?('--debug')
options[:debug] = true
end
StaticAnalysis.new.run_tasks!(options) StaticAnalysis.new.run_tasks!(options)
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment