Commit 5e0683e9 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'sh-fix-pseudonymizer' into 'master'

Pseudonymizer: Gracefully handle empty pseudo entries

See merge request gitlab-org/gitlab-ee!9044
parents a29ae2bb 81188b39
---
title: 'Pseudonymizer: Gracefully handle empty pseudo entries'
merge_request: 9044
author:
type: fixed
...@@ -86,7 +86,7 @@ module Pseudonymizer ...@@ -86,7 +86,7 @@ module Pseudonymizer
type_results = type_results.map do |c| type_results = type_results.map do |c|
data_type = c.sql_type data_type = c.sql_type
if table_config[:pseudo].include?(c.name) if table_config[:pseudo]&.include?(c.name)
data_type = "character varying" data_type = "character varying"
end end
......
...@@ -27,6 +27,8 @@ module Pseudonymizer ...@@ -27,6 +27,8 @@ module Pseudonymizer
private private
def pseudo_fields(whitelisted, pseudonymized) def pseudo_fields(whitelisted, pseudonymized)
return [] unless pseudonymized
pseudo_extra_fields = pseudonymized - whitelisted pseudo_extra_fields = pseudonymized - whitelisted
pseudo_extra_fields.each do |field| pseudo_extra_fields.each do |field|
Rails.logger.warn("#{self.class.name} extraneous pseudo: #{@table}.#{field} is not whitelisted and will be ignored.") Rails.logger.warn("#{self.class.name} extraneous pseudo: #{@table}.#{field} is not whitelisted and will be ignored.")
......
...@@ -18,9 +18,48 @@ describe Pseudonymizer::Dumper do ...@@ -18,9 +18,48 @@ describe Pseudonymizer::Dumper do
FileUtils.rm_rf(base_dir) FileUtils.rm_rf(base_dir)
end end
describe 'Pseudo tables' do describe '#tables_to_csv' do
let(:column_names) { %w(id name path description) }
def decode_project_csv(project_table_file)
columns = []
project_data = []
Zlib::GzipReader.open(project_table_file) do |gz|
csv = CSV.new(gz, headers: true)
# csv.shift # read the header row
project_data = csv.gets
columns = csv.headers
end
[columns, project_data]
end
context 'with nil pseudo fields' do
before do
pseudo.config[:tables] = {
projects: {
whitelist: column_names,
pseudo: nil
}
}
end
it 'outputs valid values' do
project_table_file = pseudo.tables_to_csv[0]
columns, project_data = decode_project_csv(project_table_file)
# check if CSV columns are correct
expect(columns).to include(*column_names)
column_names.each do |column|
expect(project_data[column].to_s).to eq(project[column].to_s)
end
end
end
context 'with pseudo fields' do
it 'outputs project tables to csv' do it 'outputs project tables to csv' do
column_names = %w(id name path description)
pseudo.config[:tables] = { pseudo.config[:tables] = {
projects: { projects: {
whitelist: column_names, whitelist: column_names,
...@@ -34,14 +73,7 @@ describe Pseudonymizer::Dumper do ...@@ -34,14 +73,7 @@ describe Pseudonymizer::Dumper do
project_table_file = pseudo.tables_to_csv[0] project_table_file = pseudo.tables_to_csv[0]
expect(project_table_file).to end_with("projects.csv.gz") expect(project_table_file).to end_with("projects.csv.gz")
columns = [] columns, project_data = decode_project_csv(project_table_file)
project_data = []
Zlib::GzipReader.open(project_table_file) do |gz|
csv = CSV.new(gz, headers: true)
# csv.shift # read the header row
project_data = csv.gets
columns = csv.headers
end
# check if CSV columns are correct # check if CSV columns are correct
expect(columns).to include(*column_names) expect(columns).to include(*column_names)
...@@ -65,6 +97,7 @@ describe Pseudonymizer::Dumper do ...@@ -65,6 +97,7 @@ describe Pseudonymizer::Dumper do
pseudo.tables_to_csv pseudo.tables_to_csv
end end
end end
end
describe "manifest is valid" do describe "manifest is valid" do
it "all tables exist" do it "all tables exist" do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment