Commit 68774b23 authored by charlieablett's avatar charlieablett

Populate canonical emails

Background migration to generate a canonical email based
on the user's primary email. "Canonical" means the Agent
part of the email address omitting `.` and anything after
any `+`. Only applies to Gmail since they are a service
that allows `.` and ignores anything after `+` in the Agent
and all variations arrive in the same inbox.
parent c0bc7b5b
# frozen_string_literal: true
class PopulateCanonicalEmails < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
class User < ActiveRecord::Base
include EachBatch
self.table_name = 'users'
scope :with_gmail, -> { select(:id, :email).where("email ILIKE '%gmail.com'") }
end
# Limited to *@gmail.com addresses only as a first iteration, because we know
# Gmail ignores `.` appearing in the Agent name, as well as anything after `+`
def up
# batch size is the default, 1000
migration = Gitlab::BackgroundMigration::PopulateCanonicalEmails
migration_name = migration.to_s.demodulize
queue_background_migration_jobs_by_range_at_intervals(
User.with_gmail,
migration_name,
1.minute)
end
def down
# no-op
end
end
...@@ -13007,6 +13007,7 @@ COPY "schema_migrations" (version) FROM STDIN; ...@@ -13007,6 +13007,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200311165635 20200311165635
20200311192351 20200311192351
20200311214912 20200311214912
20200312053852
20200312125121 20200312125121
20200312160532 20200312160532
20200312163407 20200312163407
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Class to populate new rows of UserCanonicalEmail based on existing email addresses
class PopulateCanonicalEmails
def perform(start_id, stop_id)
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO
user_canonical_emails (
user_id,
canonical_email,
created_at,
updated_at
)
SELECT users.id AS user_id,
concat(translate(split_part(split_part(users.email, '@', 1), '+', 1), '.', ''), '@gmail.com') AS canonical_email,
NOW() AS created_at,
NOW() AS updated_at
FROM users
WHERE users.email ILIKE '%@gmail.com'
AND users.id BETWEEN #{start_id} AND #{stop_id}
ON CONFLICT DO NOTHING;
SQL
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::PopulateCanonicalEmails, :migration, schema: 20200312053852 do
let(:migration) { described_class.new }
let_it_be(:users_table) { table(:users) }
let_it_be(:user_canonical_emails_table) { table(:user_canonical_emails) }
let_it_be(:users) { users_table.all }
let_it_be(:user_canonical_emails) { user_canonical_emails_table.all }
subject { migration.perform(1, 1) }
describe 'gmail users' do
using RSpec::Parameterized::TableSyntax
where(:original_email, :expected_result) do
'legitimateuser@gmail.com' | 'legitimateuser@gmail.com'
'userwithplus+somestuff@gmail.com' | 'userwithplus@gmail.com'
'user.with.periods@gmail.com' | 'userwithperiods@gmail.com'
'user.with.periods.and.plus+someotherstuff@gmail.com' | 'userwithperiodsandplus@gmail.com'
end
with_them do
it 'generates the correct canonical email' do
create_user(email: original_email, id: 1)
subject
result = canonical_emails
expect(result.count).to eq 1
expect(result.first).to match({
'user_id' => 1,
'canonical_email' => expected_result
})
end
end
end
describe 'non gmail.com domain users' do
%w[
legitimateuser@somedomain.com
userwithplus+somestuff@other.com
user.with.periods@gmail.org
user.with.periods.and.plus+someotherstuff@orangmail.com
].each do |non_gmail_address|
it 'does not generate a canonical email' do
create_user(email: non_gmail_address, id: 1)
subject
expect(canonical_emails(user_id: 1).count).to eq 0
end
end
end
describe 'gracefully handles missing records' do
specify { expect { subject }.not_to raise_error }
end
describe 'gracefully handles existing records, some of which may have an already-existing identical canonical_email field' do
let_it_be(:user_one) { create_user(email: "example.user@gmail.com", id: 1) }
let_it_be(:user_two) { create_user(email: "exampleuser@gmail.com", id: 2) }
let_it_be(:user_email_one) { user_canonical_emails.create(canonical_email: "exampleuser@gmail.com", user_id: user_one.id) }
subject { migration.perform(1, 2) }
it 'only creates one record' do
subject
expect(canonical_emails.count).not_to be_nil
end
end
def create_user(attributes)
default_attributes = {
projects_limit: 0
}
users.create(default_attributes.merge!(attributes))
end
def canonical_emails(user_id: nil)
filter_by_id = user_id ? "WHERE user_id = #{user_id}" : ""
ApplicationRecord.connection.execute <<~SQL
SELECT canonical_email, user_id
FROM user_canonical_emails
#{filter_by_id};
SQL
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment