Commit 81188b39 authored by Stan Hu's avatar Stan Hu

Pseudonymizer: Gracefully handle empty pseudo entries

https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/8213
removed the pseudo fields for all entries, but the code was not
updated to handle this.

Discovered in https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/5744
parent c7abcd31
---
title: 'Pseudonymizer: Gracefully handle empty pseudo entries'
merge_request: 9044
author:
type: fixed
......@@ -86,7 +86,7 @@ module Pseudonymizer
type_results = type_results.map do |c|
data_type = c.sql_type
if table_config[:pseudo].include?(c.name)
if table_config[:pseudo]&.include?(c.name)
data_type = "character varying"
end
......
......@@ -27,6 +27,8 @@ module Pseudonymizer
private
def pseudo_fields(whitelisted, pseudonymized)
return [] unless pseudonymized
pseudo_extra_fields = pseudonymized - whitelisted
pseudo_extra_fields.each do |field|
Rails.logger.warn("#{self.class.name} extraneous pseudo: #{@table}.#{field} is not whitelisted and will be ignored.")
......
......@@ -18,24 +18,13 @@ describe Pseudonymizer::Dumper do
FileUtils.rm_rf(base_dir)
end
describe 'Pseudo tables' do
it 'outputs project tables to csv' do
column_names = %w(id name path description)
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: %w(id)
}
}
expect(pseudo.output_dir).to eq(base_dir)
# grab the first table it outputs. There would only be 1.
project_table_file = pseudo.tables_to_csv[0]
expect(project_table_file).to end_with("projects.csv.gz")
describe '#tables_to_csv' do
let(:column_names) { %w(id name path description) }
def decode_project_csv(project_table_file)
columns = []
project_data = []
Zlib::GzipReader.open(project_table_file) do |gz|
csv = CSV.new(gz, headers: true)
# csv.shift # read the header row
......@@ -43,26 +32,70 @@ describe Pseudonymizer::Dumper do
columns = csv.headers
end
# check if CSV columns are correct
expect(columns).to include(*column_names)
[columns, project_data]
end
context 'with nil pseudo fields' do
before do
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: nil
}
}
end
it 'outputs valid values' do
project_table_file = pseudo.tables_to_csv[0]
columns, project_data = decode_project_csv(project_table_file)
# check if CSV columns are correct
expect(columns).to include(*column_names)
# is it pseudonymous
# sha 256 is 64 chars in length
expect(project_data["id"].length).to eq(64)
column_names.each do |column|
expect(project_data[column].to_s).to eq(project[column].to_s)
end
end
end
it "warns when pseudonymized fields are extraneous" do
column_names = %w(id name path description)
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: %w(id extraneous)
context 'with pseudo fields' do
it 'outputs project tables to csv' do
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: %w(id)
}
}
expect(pseudo.output_dir).to eq(base_dir)
# grab the first table it outputs. There would only be 1.
project_table_file = pseudo.tables_to_csv[0]
expect(project_table_file).to end_with("projects.csv.gz")
columns, project_data = decode_project_csv(project_table_file)
# check if CSV columns are correct
expect(columns).to include(*column_names)
# is it pseudonymous
# sha 256 is 64 chars in length
expect(project_data["id"].length).to eq(64)
end
it "warns when pseudonymized fields are extraneous" do
column_names = %w(id name path description)
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: %w(id extraneous)
}
}
}
expect(Rails.logger).to receive(:warn).with(/extraneous/)
expect(Rails.logger).to receive(:warn).with(/extraneous/)
pseudo.tables_to_csv
pseudo.tables_to_csv
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment