Commit 81188b39 authored by Stan Hu's avatar Stan Hu

Pseudonymizer: Gracefully handle empty pseudo entries

https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/8213
removed the pseudo fields for all entries, but the code was not
updated to handle this.

Discovered in https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/5744
parent c7abcd31
---
title: 'Pseudonymizer: Gracefully handle empty pseudo entries'
merge_request: 9044
author:
type: fixed
...@@ -86,7 +86,7 @@ module Pseudonymizer ...@@ -86,7 +86,7 @@ module Pseudonymizer
type_results = type_results.map do |c| type_results = type_results.map do |c|
data_type = c.sql_type data_type = c.sql_type
if table_config[:pseudo].include?(c.name) if table_config[:pseudo]&.include?(c.name)
data_type = "character varying" data_type = "character varying"
end end
......
...@@ -27,6 +27,8 @@ module Pseudonymizer ...@@ -27,6 +27,8 @@ module Pseudonymizer
private private
def pseudo_fields(whitelisted, pseudonymized) def pseudo_fields(whitelisted, pseudonymized)
return [] unless pseudonymized
pseudo_extra_fields = pseudonymized - whitelisted pseudo_extra_fields = pseudonymized - whitelisted
pseudo_extra_fields.each do |field| pseudo_extra_fields.each do |field|
Rails.logger.warn("#{self.class.name} extraneous pseudo: #{@table}.#{field} is not whitelisted and will be ignored.") Rails.logger.warn("#{self.class.name} extraneous pseudo: #{@table}.#{field} is not whitelisted and will be ignored.")
......
...@@ -18,24 +18,13 @@ describe Pseudonymizer::Dumper do ...@@ -18,24 +18,13 @@ describe Pseudonymizer::Dumper do
FileUtils.rm_rf(base_dir) FileUtils.rm_rf(base_dir)
end end
describe 'Pseudo tables' do describe '#tables_to_csv' do
it 'outputs project tables to csv' do let(:column_names) { %w(id name path description) }
column_names = %w(id name path description)
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: %w(id)
}
}
expect(pseudo.output_dir).to eq(base_dir)
# grab the first table it outputs. There would only be 1.
project_table_file = pseudo.tables_to_csv[0]
expect(project_table_file).to end_with("projects.csv.gz")
def decode_project_csv(project_table_file)
columns = [] columns = []
project_data = [] project_data = []
Zlib::GzipReader.open(project_table_file) do |gz| Zlib::GzipReader.open(project_table_file) do |gz|
csv = CSV.new(gz, headers: true) csv = CSV.new(gz, headers: true)
# csv.shift # read the header row # csv.shift # read the header row
...@@ -43,26 +32,70 @@ describe Pseudonymizer::Dumper do ...@@ -43,26 +32,70 @@ describe Pseudonymizer::Dumper do
columns = csv.headers columns = csv.headers
end end
# check if CSV columns are correct [columns, project_data]
expect(columns).to include(*column_names) end
context 'with nil pseudo fields' do
before do
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: nil
}
}
end
it 'outputs valid values' do
project_table_file = pseudo.tables_to_csv[0]
columns, project_data = decode_project_csv(project_table_file)
# check if CSV columns are correct
expect(columns).to include(*column_names)
# is it pseudonymous column_names.each do |column|
# sha 256 is 64 chars in length expect(project_data[column].to_s).to eq(project[column].to_s)
expect(project_data["id"].length).to eq(64) end
end
end end
it "warns when pseudonymized fields are extraneous" do context 'with pseudo fields' do
column_names = %w(id name path description) it 'outputs project tables to csv' do
pseudo.config[:tables] = { pseudo.config[:tables] = {
projects: { projects: {
whitelist: column_names, whitelist: column_names,
pseudo: %w(id extraneous) pseudo: %w(id)
}
}
expect(pseudo.output_dir).to eq(base_dir)
# grab the first table it outputs. There would only be 1.
project_table_file = pseudo.tables_to_csv[0]
expect(project_table_file).to end_with("projects.csv.gz")
columns, project_data = decode_project_csv(project_table_file)
# check if CSV columns are correct
expect(columns).to include(*column_names)
# is it pseudonymous
# sha 256 is 64 chars in length
expect(project_data["id"].length).to eq(64)
end
it "warns when pseudonymized fields are extraneous" do
column_names = %w(id name path description)
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: %w(id extraneous)
}
} }
}
expect(Rails.logger).to receive(:warn).with(/extraneous/) expect(Rails.logger).to receive(:warn).with(/extraneous/)
pseudo.tables_to_csv pseudo.tables_to_csv
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment