Commit bde39322 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Add a linter for PO files

parent 1eb30cfb
......@@ -349,6 +349,8 @@ group :development, :test do
gem 'activerecord_sane_schema_dumper', '0.2'
gem 'stackprof', '~> 0.2.10', require: false
gem 'simple_po_parser', '~> 1.1.2', require: false
end
group :test do
......
......@@ -833,6 +833,7 @@ GEM
faraday (~> 0.9)
jwt (~> 1.5)
multi_json (~> 1.10)
simple_po_parser (1.1.2)
simplecov (0.14.1)
docile (~> 1.1.0)
json (>= 1.8, < 3)
......@@ -1145,6 +1146,7 @@ DEPENDENCIES
sidekiq (~> 5.0)
sidekiq-cron (~> 0.6.0)
sidekiq-limit_fetch (~> 3.4)
simple_po_parser (~> 1.1.2)
simplecov (~> 0.14.0)
slack-notifier (~> 1.5.1)
spinach-rails (~> 0.2.1)
......
require 'simple_po_parser'
module Gitlab
class PoLinter
attr_reader :po_path, :entries, :locale
VARIABLE_REGEX = /%{\w*}|%[a-z]/.freeze
def initialize(po_path, locale = I18n.locale.to_s)
@po_path = po_path
@locale = locale
end
def errors
@errors ||= validate_po
end
def validate_po
if parse_error = parse_po
return 'PO-syntax errors' => [parse_error]
end
validate_entries
end
def parse_po
@entries = SimplePoParser.parse(po_path)
nil
rescue SimplePoParser::ParserError => e
@entries = []
e.message
end
def validate_entries
errors = {}
entries.each do |entry|
# Skip validation of metadata
next if entry[:msgid].empty?
errors_for_entry = validate_entry(entry)
errors[join_message(entry[:msgid])] = errors_for_entry if errors_for_entry.any?
end
errors
end
def validate_entry(entry)
errors = []
validate_flags(errors, entry)
validate_variables(errors, entry)
validate_newlines(errors, entry)
errors
end
def validate_newlines(errors, entry)
message_id = join_message(entry[:msgid])
if entry[:msgid].is_a?(Array)
errors << "<#{message_id}> is defined over multiple lines, this breaks some tooling."
end
end
def validate_variables(errors, entry)
if entry[:msgid_plural].present?
validate_variables_in_message(errors, entry[:msgid], entry['msgstr[0]'])
validate_variables_in_message(errors, entry[:msgid_plural], entry['msgstr[1]'])
else
validate_variables_in_message(errors, entry[:msgid], entry[:msgstr])
end
end
def validate_variables_in_message(errors, message_id, message_translation)
message_id = join_message(message_id)
required_variables = message_id.scan(VARIABLE_REGEX)
validate_unnamed_variables(errors, required_variables)
validate_translation(errors, message_id, required_variables)
message_translation = join_message(message_translation)
unless message_translation.empty?
validate_variable_usage(errors, message_translation, required_variables)
end
end
def validate_translation(errors, message_id, used_variables)
variables = fill_in_variables(used_variables)
begin
Gitlab::I18n.with_locale(locale) do
translated = if message_id.include?('|')
FastGettext::Translation.s_(message_id)
else
FastGettext::Translation._(message_id)
end
translated % variables
end
# `sprintf` could raise an `ArgumentError` when invalid passing something
# other than a Hash when using named variables
#
# `sprintf` could raise `TypeError` when passing a wrong type when using
# unnamed variables
#
# FastGettext::Translation could raise `RuntimeError` (raised as a string),
# or as subclassess `NoTextDomainConfigured` & `InvalidFormat`
#
# `FastGettext::Translation` could raise `ArgumentError` as subclassess
# `InvalidEncoding`, `IllegalSequence` & `InvalidCharacter`
rescue ArgumentError, TypeError, RuntimeError => e
errors << "Failure translating to #{locale} with #{variables}: #{e.message}"
end
end
def fill_in_variables(variables)
if variables.empty?
[]
elsif variables.any? { |variable| unnamed_variable?(variable) }
variables.map do |variable|
variable == '%d' ? Random.rand(1000) : Gitlab::Utils.random_string
end
else
variables.inject({}) do |hash, variable|
variable_name = variable[/\w+/]
hash[variable_name] = Gitlab::Utils.random_string
hash
end
end
end
def validate_unnamed_variables(errors, variables)
if variables.size > 1 && variables.any? { |variable_name| unnamed_variable?(variable_name) }
errors << 'is combining multiple unnamed variables'
end
end
def validate_variable_usage(errors, translation, required_variables)
found_variables = translation.scan(VARIABLE_REGEX)
missing_variables = required_variables - found_variables
if missing_variables.any?
errors << "<#{translation}> is missing: [#{missing_variables.to_sentence}]"
end
unknown_variables = found_variables - required_variables
if unknown_variables.any?
errors << "<#{translation}> is using unknown variables: [#{unknown_variables.to_sentence}]"
end
end
def unnamed_variable?(variable_name)
!variable_name.start_with?('%{')
end
def validate_flags(errors, entry)
if flag = entry[:flag]
errors << "is marked #{flag}"
end
end
def join_message(message)
Array(message).join
end
end
end
......@@ -42,5 +42,9 @@ module Gitlab
'No'
end
end
def random_string
Random.rand(Float::MAX.to_i).to_s(36)
end
end
end
......@@ -19,4 +19,44 @@ namespace :gettext do
Rake::Task['gettext:pack'].invoke
Rake::Task['gettext:po_to_json'].invoke
end
desc 'Lint all po files in `locale/'
task lint: :environment do
FastGettext.silence_errors
files = Dir.glob(Rails.root.join('locale/*/gitlab.po'))
linters = files.map do |file|
locale = File.basename(File.dirname(file))
Gitlab::PoLinter.new(file, locale)
end
pot_file = Rails.root.join('locale/gitlab.pot')
linters.unshift(Gitlab::PoLinter.new(pot_file))
failed_linters = linters.select { |linter| linter.errors.any? }
if failed_linters.empty?
puts 'All PO files are valid.'
else
failed_linters.each do |linter|
report_errors_for_file(linter.po_path, linter.errors)
end
raise "Not all PO-files are valid: #{failed_linters.map(&:po_path).to_sentence}"
end
end
def report_errors_for_file(file, errors_for_file)
puts "Errors in `#{file}`:"
errors_for_file.each do |message_id, errors|
puts " #{message_id}"
errors.each do |error|
spaces = ' ' * 4
error = error.lines.join("#{spaces}")
puts "#{spaces}#{error}"
end
end
end
end
......@@ -12,7 +12,8 @@ tasks = [
%w[bundle exec license_finder],
%w[yarn run eslint],
%w[bundle exec rubocop --require rubocop-rspec],
%w[scripts/lint-conflicts.sh]
%w[scripts/lint-conflicts.sh],
%w[bundle exec rake gettext:lint]
]
failed_tasks = tasks.reduce({}) do |failures, task|
......
# Spanish translations for gitlab package.
# Copyright (C) 2017 THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the gitlab package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2017.
#
msgid ""
msgstr ""
"Project-Id-Version: gitlab 1.0.0\n"
"Report-Msgid-Bugs-To: \n"
"PO-Revision-Date: 2017-07-12 12:35-0500\n"
"Language-Team: Spanish\n"
"Language: es\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"Last-Translator: Bob Van Landuyt <bob@gitlab.com>\n"
"X-Generator: Poedit 2.0.2\n"
msgid "1 commit"
msgid_plural "%d commits"
msgstr[0] "1 cambio"
msgstr[1] "%d cambios"
#, fuzzy
msgid "PipelineSchedules|Remove variable row"
msgstr "Схема"
# Spanish translations for gitlab package.
# Copyright (C) 2017 THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the gitlab package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2017.
#
msgid ""
msgstr ""
"Project-Id-Version: gitlab 1.0.0\n"
"Report-Msgid-Bugs-To: \n"
"PO-Revision-Date: 2017-07-12 12:35-0500\n"
"Language-Team: Spanish\n"
"Language: es\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"Last-Translator: Bob Van Landuyt <bob@gitlab.com>\n"
"X-Generator: Poedit 2.0.2\n"
msgid "%d commit"
msgid_plural "%d commits"
msgstr[0] "%d cambio"
msgstr[1] "%d cambios"
But this doesn't even look like an PO-entry
\ No newline at end of file
# Spanish translations for gitlab package.
# Copyright (C) 2017 THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the gitlab package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2017.
#
msgid ""
msgstr ""
"Project-Id-Version: gitlab 1.0.0\n"
"Report-Msgid-Bugs-To: \n"
"PO-Revision-Date: 2017-07-12 12:35-0500\n"
"Language-Team: Spanish\n"
"Language: es\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"Last-Translator: Bob Van Landuyt <bob@gitlab.com>\n"
"X-Generator: Poedit 2.0.2\n"
msgid "1 commit"
msgid_plural "%d commits"
msgstr[0] "1 cambio"
msgstr[1] "%d cambios"
msgid ""
"You are going to remove %{group_name}.\n"
"Removed groups CANNOT be restored!\n"
"Are you ABSOLUTELY sure?"
msgstr ""
"Va a eliminar %{group_name}.\n"
"¡El grupo eliminado NO puede ser restaurado!\n"
"¿Estás TOTALMENTE seguro?"
This diff is collapsed.
require 'spec_helper'
describe Gitlab::PoLinter do
let(:linter) { described_class.new(po_path) }
let(:po_path) { 'spec/fixtures/valid.po' }
describe '#errors' do
it 'only calls validation once' do
expect(linter).to receive(:validate_po).once.and_call_original
2.times { linter.errors }
end
end
describe '#validate_po' do
subject(:errors) { linter.validate_po }
context 'for a fuzzy message' do
let(:po_path) { 'spec/fixtures/fuzzy.po' }
it 'has an error' do
is_expected.to include('PipelineSchedules|Remove variable row' => ['is marked fuzzy'])
end
end
context 'for a translations with newlines' do
let(:po_path) { 'spec/fixtures/newlines.po' }
it 'has an error' do
message_id = "You are going to remove %{group_name}.\\nRemoved groups CANNOT be restored!\\nAre you ABSOLUTELY sure?"
expected_message = "<#{message_id}> is defined over multiple lines, this breaks some tooling."
is_expected.to include(message_id => [expected_message])
end
end
context 'with an invalid po' do
let(:po_path) { 'spec/fixtures/invalid.po' }
it 'returns the error' do
is_expected.to include('PO-syntax errors' => a_kind_of(Array))
end
it 'does not validate entries' do
expect(linter).not_to receive(:validate_entries)
linter.validate_po
end
end
context 'with a valid po' do
it 'parses the file' do
expect(linter).to receive(:parse_po).and_call_original
linter.validate_po
end
it 'validates the entries' do
expect(linter).to receive(:validate_entries).and_call_original
linter.validate_po
end
it 'has no errors' do
is_expected.to be_empty
end
end
end
describe '#parse_po' do
context 'with a valid po' do
it 'fills in the entries' do
linter.parse_po
expect(linter.entries).not_to be_empty
end
it 'does not have errors' do
expect(linter.parse_po).to be_nil
end
end
context 'with an invalid po' do
let(:po_path) { 'spec/fixtures/invalid.po' }
it 'contains an error' do
expect(linter.parse_po).not_to be_nil
end
it 'sets the entries to an empty array' do
linter.parse_po
expect(linter.entries).to eq([])
end
end
end
describe '#validate_entries' do
it 'skips entries without a `msgid`' do
allow(linter).to receive(:entries) { [{ msgid: "" }] }
expect(linter.validate_entries).to be_empty
end
it 'keeps track of errors for entries' do
fake_invalid_entry = { msgid: "Hello %{world}", msgstr: "Bonjour %{monde}" }
allow(linter).to receive(:entries) { [fake_invalid_entry] }
expect(linter).to receive(:validate_entry)
.with(fake_invalid_entry)
.and_call_original
expect(linter.validate_entries).to include("Hello %{world}" => an_instance_of(Array))
end
end
describe '#validate_entry' do
it 'validates the flags, variable usage, and newlines' do
fake_entry = double
expect(linter).to receive(:validate_flags).with([], fake_entry)
expect(linter).to receive(:validate_variables).with([], fake_entry)
expect(linter).to receive(:validate_newlines).with([], fake_entry)
linter.validate_entry(fake_entry)
end
end
describe '#validate_variables' do
it 'validates both signular and plural in a pluralized string' do
pluralized_entry = {
msgid: 'Hello %{world}',
msgid_plural: 'Hello all %{world}',
'msgstr[0]' => 'Bonjour %{world}',
'msgstr[1]' => 'Bonjour tous %{world}'
}
expect(linter).to receive(:validate_variables_in_message)
.with([], 'Hello %{world}', 'Bonjour %{world}')
expect(linter).to receive(:validate_variables_in_message)
.with([], 'Hello all %{world}', 'Bonjour tous %{world}')
linter.validate_variables([], pluralized_entry)
end
it 'validates the message variables' do
entry = { msgid: 'Hello', msgstr: 'Bonjour' }
expect(linter).to receive(:validate_variables_in_message)
.with([], 'Hello', 'Bonjour')
linter.validate_variables([], entry)
end
end
describe '#validate_variables_in_message' do
it 'detects when a variables are used incorrectly' do
errors = []
expected_errors = ['<hello %{world} %d> is missing: [%{hello}]',
'<hello %{world} %d> is using unknown variables: [%{world}]',
'is combining multiple unnamed variables']
linter.validate_variables_in_message(errors, '%{hello} world %d', 'hello %{world} %d')
expect(errors).to include(*expected_errors)
end
end
describe '#validate_translation' do
it 'succeeds with valid variables' do
errors = []
linter.validate_translation(errors, 'Hello %{world}', ['%{world}'])
expect(errors).to be_empty
end
it 'adds an error message when translating fails' do
errors = []
expect(FastGettext::Translation).to receive(:_) { raise 'broken' }
linter.validate_translation(errors, 'Hello', [])
expect(errors).to include('Failure translating to en with []: broken')
end
it 'adds an error message when translating fails when translating with context' do
errors = []
expect(FastGettext::Translation).to receive(:s_) { raise 'broken' }
linter.validate_translation(errors, 'Tests|Hello', [])
expect(errors).to include('Failure translating to en with []: broken')
end
it "adds an error when trying to translate with incorrect variables when using unnamed variables" do
errors = []
linter.validate_translation(errors, 'Hello %d', ['%s'])
expect(errors.first).to start_with("Failure translating to en with")
end
it "adds an error when trying to translate with named variables when unnamed variables are expected" do
errors = []
linter.validate_translation(errors, 'Hello %d', ['%{world}'])
expect(errors.first).to start_with("Failure translating to en with")
end
it 'adds an error when translated with incorrect variables using named variables' do
errors = []
linter.validate_translation(errors, 'Hello %{thing}', ['%d'])
expect(errors.first).to start_with("Failure translating to en with")
end
end
describe '#fill_in_variables' do
it 'builds an array for %d translations' do
result = linter.fill_in_variables(['%d'])
expect(result).to contain_exactly(a_kind_of(Integer))
end
it 'builds an array for %s translations' do
result = linter.fill_in_variables(['%s'])
expect(result).to contain_exactly(a_kind_of(String))
end
it 'builds a hash for named variables' do
result = linter.fill_in_variables(['%{hello}'])
expect(result).to be_a(Hash)
expect(result).to include('hello' => an_instance_of(String))
end
end
end
require 'spec_helper'
describe Gitlab::Utils do
delegate :to_boolean, :boolean_to_yes_no, :slugify, to: :described_class
delegate :to_boolean, :boolean_to_yes_no, :slugify, :random_string, to: :described_class
describe '.slugify' do
{
......@@ -53,4 +53,10 @@ describe Gitlab::Utils do
expect(boolean_to_yes_no(false)).to eq('No')
end
end
describe '.random_string' do
it 'generates a string' do
expect(random_string).to be_kind_of(String)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment