Commit 6837ce38 authored by Dmytro Zaporozhets's avatar Dmytro Zaporozhets Committed by Robert Speicher

Add Product Analytics collector

Rack application within Rails to record product analytics events.
Signed-off-by: default avatarDmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com>
parent aff2c0b0
...@@ -8,6 +8,8 @@ class ProductAnalyticsEvent < ApplicationRecord ...@@ -8,6 +8,8 @@ class ProductAnalyticsEvent < ApplicationRecord
belongs_to :project belongs_to :project
validates :event_id, :project_id, :v_collector, :v_etl, presence: true
# There is no default Rails timestamps in the table. # There is no default Rails timestamps in the table.
# collector_tstamp is a timestamp when a collector recorded an event. # collector_tstamp is a timestamp when a collector recorded an event.
scope :order_by_time, -> { order(collector_tstamp: :desc) } scope :order_by_time, -> { order(collector_tstamp: :desc) }
......
...@@ -68,6 +68,15 @@ class Rack::Attack ...@@ -68,6 +68,15 @@ class Rack::Attack
end end
end end
# Product analytics feature is in experimental stage.
# At this point we want to limit amount of events registered
# per application (aid stands for application id).
throttle('throttle_product_analytics_collector', limit: 100, period: 60) do |req|
if req.product_analytics_collector_request?
req.params['aid']
end
end
throttle('throttle_authenticated_web', Gitlab::Throttle.authenticated_web_options) do |req| throttle('throttle_authenticated_web', Gitlab::Throttle.authenticated_web_options) do |req|
if req.web_request? && if req.web_request? &&
Gitlab::Throttle.settings.throttle_authenticated_web_enabled Gitlab::Throttle.settings.throttle_authenticated_web_enabled
...@@ -128,6 +137,10 @@ class Rack::Attack ...@@ -128,6 +137,10 @@ class Rack::Attack
path =~ %r{^/-/(health|liveness|readiness)} path =~ %r{^/-/(health|liveness|readiness)}
end end
def product_analytics_collector_request?
path.start_with?('/-/collector/i')
end
def should_be_skipped? def should_be_skipped?
api_internal_request? || health_check_request? api_internal_request? || health_check_request?
end end
......
require 'sidekiq/web' require 'sidekiq/web'
require 'sidekiq/cron/web' require 'sidekiq/cron/web'
require 'product_analytics/collector_app'
Rails.application.routes.draw do Rails.application.routes.draw do
concern :access_requestable do concern :access_requestable do
...@@ -176,6 +177,9 @@ Rails.application.routes.draw do ...@@ -176,6 +177,9 @@ Rails.application.routes.draw do
# Used by third parties to verify CI_JOB_JWT, placeholder route # Used by third parties to verify CI_JOB_JWT, placeholder route
# in case we decide to move away from doorkeeper-openid_connect # in case we decide to move away from doorkeeper-openid_connect
get 'jwks' => 'doorkeeper/openid_connect/discovery#keys' get 'jwks' => 'doorkeeper/openid_connect/discovery#keys'
# Product analytics collector
match '/collector/i', to: ProductAnalytics::CollectorApp.new, via: :all
end end
# End of the /-/ scope. # End of the /-/ scope.
......
# frozen_string_literal: true
module ProductAnalytics
class CollectorApp
def call(env)
request = Rack::Request.new(env)
params = request.params
return not_found unless EventParams.has_required_params?(params)
# Product analytics feature is behind a flag and is disabled by default.
# We expect limited amount of projects with this feature enabled in first release.
# Since collector has no authentication we temporary prevent recording of events
# for project without the feature enabled. During increase of feature adoption, this
# check will be removed for better performance.
project = Project.find(params['aid'].to_i)
return not_found unless Feature.enabled?(:product_analytics, project, default_enabled: false)
# Snowplow tracker has own format of events.
# We need to convert them to match the schema of our database.
event_params = EventParams.parse_event_params(params)
if ProductAnalyticsEvent.create(event_params)
ok
else
not_found
end
rescue ActiveRecord::InvalidForeignKey, ActiveRecord::RecordNotFound
not_found
end
def ok
[200, {}, []]
end
def not_found
[404, {}, []]
end
end
end
# frozen_string_literal: true
module ProductAnalytics
# Converts params from Snowplow tracker to one compatible with
# GitLab ProductAnalyticsEvent model. The field naming corresponds
# with snowplow event model. Only project_id is GitLab specific.
#
# For information on what each field is you can check next resources:
# * Snowplow tracker protocol: https://github.com/snowplow/snowplow/wiki/snowplow-tracker-protocol
# * Canonical event model: https://github.com/snowplow/snowplow/wiki/canonical-event-model
class EventParams
def self.parse_event_params(params)
{
project_id: params['aid'],
platform: params['p'],
collector_tstamp: Time.zone.now,
event_id: params['eid'],
v_tracker: params['tv'],
v_collector: Gitlab::VERSION,
v_etl: Gitlab::VERSION,
os_timezone: params['tz'],
name_tracker: params['tna'],
br_lang: params['lang'],
doc_charset: params['cs'],
br_features_pdf: Gitlab::Utils.to_boolean(params['f_pdf']),
br_features_flash: Gitlab::Utils.to_boolean(params['f_fla']),
br_features_java: Gitlab::Utils.to_boolean(params['f_java']),
br_features_director: Gitlab::Utils.to_boolean(params['f_dir']),
br_features_quicktime: Gitlab::Utils.to_boolean(params['f_qt']),
br_features_realplayer: Gitlab::Utils.to_boolean(params['f_realp']),
br_features_windowsmedia: Gitlab::Utils.to_boolean(params['f_wma']),
br_features_gears: Gitlab::Utils.to_boolean(params['f_gears']),
br_features_silverlight: Gitlab::Utils.to_boolean(params['f_ag']),
br_colordepth: params['cd'],
br_cookies: Gitlab::Utils.to_boolean(params['cookie']),
dvce_created_tstamp: params['dtm'],
br_viewheight: params['vp'],
domain_sessionidx: params['vid'],
domain_sessionid: params['sid'],
domain_userid: params['duid'],
user_fingerprint: params['fp'],
page_referrer: params['refr'],
page_url: params['url']
}
end
def self.has_required_params?(params)
params['aid'].present? && params['eid'].present?
end
end
end
{
"aid":"1",
"p":"web",
"tna":"sp",
"tv":"js-2.14.0",
"eid":"fbf14096-74ee-47e4-883c-8a0d6cb72e37",
"duid":"79543c31-cfc3-4479-a737-fafb9333c8ba",
"sid":"54f6d3f3-f4f9-4fdc-87e0-a2c775234c1b",
"vid":4,
"url":"http://example.com/products/1",
"refr":"http://example.com/products/1",
"lang":"en-US",
"cookie":"1",
"tz":"America/Los_Angeles",
"cs":"UTF-8"
}
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ProductAnalytics::EventParams do
describe '.parse_event_params' do
subject { described_class.parse_event_params(raw_event) }
let(:raw_event) { Gitlab::Json.parse(fixture_file('product_analytics/event.json')) }
it 'extracts all params from raw event' do
expected_params = {
project_id: '1',
platform: 'web',
name_tracker: 'sp',
v_tracker: 'js-2.14.0',
event_id: 'fbf14096-74ee-47e4-883c-8a0d6cb72e37',
domain_userid: '79543c31-cfc3-4479-a737-fafb9333c8ba',
domain_sessionid: '54f6d3f3-f4f9-4fdc-87e0-a2c775234c1b',
domain_sessionidx: 4,
page_url: 'http://example.com/products/1',
page_referrer: 'http://example.com/products/1',
br_lang: 'en-US',
br_cookies: true,
os_timezone: 'America/Los_Angeles',
doc_charset: 'UTF-8'
}
expect(subject).to include(expected_params)
end
end
describe '.has_required_params?' do
subject { described_class.has_required_params?(params) }
context 'aid and eid are present' do
let(:params) { { 'aid' => 1, 'eid' => 2 } }
it { expect(subject).to be_truthy }
end
context 'aid and eid are missing' do
let(:params) { {} }
it { expect(subject).to be_falsey }
end
context 'eid is missing' do
let(:params) { { 'aid' => 1 } }
it { expect(subject).to be_falsey }
end
end
end
...@@ -5,6 +5,13 @@ RSpec.describe ProductAnalyticsEvent, type: :model do ...@@ -5,6 +5,13 @@ RSpec.describe ProductAnalyticsEvent, type: :model do
it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:project) }
it { expect(described_class).to respond_to(:order_by_time) } it { expect(described_class).to respond_to(:order_by_time) }
describe 'validations' do
it { is_expected.to validate_presence_of(:project_id) }
it { is_expected.to validate_presence_of(:event_id) }
it { is_expected.to validate_presence_of(:v_collector) }
it { is_expected.to validate_presence_of(:v_etl) }
end
describe '.timerange' do describe '.timerange' do
let_it_be(:event_1) { create(:product_analytics_event, collector_tstamp: Time.zone.now - 1.day) } let_it_be(:event_1) { create(:product_analytics_event, collector_tstamp: Time.zone.now - 1.day) }
let_it_be(:event_2) { create(:product_analytics_event, collector_tstamp: Time.zone.now - 5.days) } let_it_be(:event_2) { create(:product_analytics_event, collector_tstamp: Time.zone.now - 5.days) }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe 'ProductAnalytics::CollectorApp throttle' do
include RackAttackSpecHelpers
include_context 'rack attack cache store'
let(:project1) { create(:project) }
let(:project2) { create(:project) }
before do
allow(ProductAnalyticsEvent).to receive(:create).and_return(true)
end
context 'per application id' do
let(:params) do
{
aid: project1.id,
eid: SecureRandom.uuid
}
end
it 'throttles the endpoint' do
# Allow requests under the rate limit.
100.times do
expect_ok { get '/-/collector/i', params: params }
end
# Ensure its not related to ip address
random_next_ip
# Reject request over the limit
expect_rejection { get '/-/collector/i', params: params }
# But allows request for different aid
expect_ok { get '/-/collector/i', params: params.merge(aid: project2.id) }
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe 'ProductAnalytics::CollectorApp' do
let_it_be(:project) { create(:project) }
let(:params) { {} }
subject { get '/-/collector/i', params: params }
RSpec.shared_examples 'not found' do
it 'repond with 404' do
expect { subject }.not_to change { ProductAnalyticsEvent.count }
expect(response).to have_gitlab_http_status(:not_found)
end
end
context 'correct event params' do
let(:params) do
{
aid: project.id,
p: 'web',
tna: 'sp',
tv: 'js-2.14.0',
eid: SecureRandom.uuid,
duid: SecureRandom.uuid,
sid: SecureRandom.uuid,
vid: 4,
url: 'http://example.com/products/1',
refr: 'http://example.com/products/1',
lang: 'en-US',
cookie: true,
tz: 'America/Los_Angeles',
cs: 'UTF-8'
}
end
it 'repond with 200' do
expect { subject }.to change { ProductAnalyticsEvent.count }.by(1)
expect(response).to have_gitlab_http_status(:ok)
end
context 'feature disabled' do
before do
stub_feature_flags(product_analytics: false)
end
it_behaves_like 'not found'
end
end
context 'empty event params' do
it_behaves_like 'not found'
end
context 'invalid project id in params' do
let(:params) do
{
aid: '-1',
p: 'web',
tna: 'sp',
tv: 'js-2.14.0',
eid: SecureRandom.uuid,
duid: SecureRandom.uuid,
sid: SecureRandom.uuid
}
end
it_behaves_like 'not found'
end
end
...@@ -30,4 +30,16 @@ module RackAttackSpecHelpers ...@@ -30,4 +30,16 @@ module RackAttackSpecHelpers
expect(response).to have_gitlab_http_status(:too_many_requests) expect(response).to have_gitlab_http_status(:too_many_requests)
end end
def expect_ok(&block)
yield
expect(response).to have_gitlab_http_status(:ok)
end
def random_next_ip
allow_next_instance_of(Rack::Attack::Request) do |instance|
allow(instance).to receive(:ip).and_return(FFaker::Internet.ip_v4_address)
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment