Commit 18092378 authored by Jérome Perrin's avatar Jérome Perrin

Gitlab: use frontends' authenticate-to-backend and rate limit archive downloads

Instead of using a list of frontends IP addresses to determine if the backend can trust the frontend's `X-Forwarded-For` header, use the same [`authenticate-to-backend`](https://lab.nexedi.com/nexedi/slapos/-/blob/d48d682dfc67d7845f0346f01772573c9e4edc8e/software/rapid-cdn/instance-slave-input-schema.json#L215-223) approach as with ERP5: the frontend connects to the backend with a client certificate and if the backend can verify this certificate, it trusts `X-Forwarded-For` from the frontend and uses this as client IP.
Otherwise, without a verified certificate, the frontend's own IP address is uses as client IP.

This means that:
 - frontend shared instances must use `authenticate-to-backend` in parameters
 - gitlab instance must use `frontend-caucase-url-list` in parameters
 - gitlab instance no longer use `nginx_real_ip_trusted_addresses` in parameters

This branch also contains some mitigation for 503 errors we observed when too many clients were downloading archives (we had several hundreds of ongoing requests preparing archives), the approach is simply to rate-limit the download archives, implemented in nginx because gitlab does not expose rack-attack configuration for this.

See merge request nexedi/slapos!1676
parents 3eebea89 cd78aec9
......@@ -14,7 +14,7 @@
# not need these here).
[instance.cfg]
filename = instance.cfg.in
md5sum = 3ffdd78aeb77ab581c51ce419176dd37
md5sum = 3607ea995293975a736be136f0cdf675
[watcher]
_update_hash_filename_ = watcher.in
......@@ -34,7 +34,7 @@ md5sum = c559a24ab6281268b608ed3bccb8e4ce
[gitlab-parameters.cfg]
_update_hash_filename_ = gitlab-parameters.cfg
md5sum = 95b18789111ed239146d243e39ffefbe
md5sum = 16b25d654fe1f219a78d8a3da16b07dd
[gitlab-shell-config.yml.in]
_update_hash_filename_ = template/gitlab-shell-config.yml.in
......@@ -54,7 +54,7 @@ md5sum = d769ea27820e932c596c35bbbf3f2902
[instance-gitlab.cfg.in]
_update_hash_filename_ = instance-gitlab.cfg.in
md5sum = 6d8d20ded84622339d49c60b0e61380c
md5sum = e17bfe96bb9a0f4666d90c877a663e88
[instance-gitlab-export.cfg.in]
_update_hash_filename_ = instance-gitlab-export.cfg.in
......@@ -66,7 +66,7 @@ md5sum = 70612697434bf4fbe838fdf4fd867ed8
[nginx-gitlab-http.conf.in]
_update_hash_filename_ = template/nginx-gitlab-http.conf.in
md5sum = 4980c1571a4dd7753aaa60d065270849
md5sum = b40b6d7948f4a54c45f2ecbb7e3d7a36
[nginx.conf.in]
_update_hash_filename_ = template/nginx.conf.in
......
......@@ -96,7 +96,7 @@ configuration.nginx_proxy_connect_timeout = 300
# nginx advanced
configuration.nginx_worker_processes = 4
configuration.nginx_worker_connections = 10240
configuration.nginx_log_format = $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"
configuration.nginx_log_format = $trusted_remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"
configuration.nginx_sendfile = on
configuration.nginx_tcp_nopush = on
configuration.nginx_tcp_nodelay = on
......@@ -115,7 +115,13 @@ configuration.nginx_gzip_enabled = true
# configuring trusted proxies
# GitLab is behind a reverse proxy, so we don't want the IP address of the proxy
# to show up as the client address (because rack attack blacklists the lab
# frontend)
# frontend). frontend-caucase-url-list offers a more manageable approach than listing
# all frontends IPs.
configuration.nginx_real_ip_trusted_addresses =
configuration.nginx_real_ip_header = X-Forwarded-For
configuration.nginx_real_ip_recursive = off
# space separated URLs of caucase service providing CA to validate frontends client
# certificate and trust the frontend if they provide a valid certificate.
configuration.frontend-caucase-url-list =
# rate limit of git projects archive download, in requests per minutes.
configuration.nginx_download_archive_rate_limit = 1
......@@ -748,6 +748,63 @@ copytruncate = true
# Nginx frontend #
######################
[frontend-caucase-ssl]
ca =
crl =
{% import "caucase" as caucase with context %}
{% set frontend_caucase_url_list = instance_parameter_dict.get('configuration.frontend-caucase-url-list', '').split() -%}
{% set frontend_caucase_url_hash_list = [] -%}
{% set frontend_caucase_updater_section_list = [] -%}
{% for frontend_caucase_url in frontend_caucase_url_list -%}
{% set hash = hashlib.md5(frontend_caucase_url.encode()).hexdigest() -%}
{% do frontend_caucase_url_hash_list.append(hash) -%}
{% set data_dir = '${nginx-ssl-dir:ssl}/%s' % hash -%}
{{ caucase.updater(
prefix='frontend-caucase-updater-%s' % hash,
buildout_bin_directory=buildout_bin_directory,
updater_path='${directory:service}/frontend-caucase-updater-%s' % hash,
url=frontend_caucase_url,
data_dir=data_dir,
ca_path='%s/ca.crt' % data_dir,
crl_path='%s/crl.pem' % data_dir,
on_renew='${frontend-caucase-updater-housekeeper:output}',
max_sleep=1,
openssl=openssl_bin,
)}}
{% do frontend_caucase_updater_section_list.append('frontend-caucase-updater-%s' % hash) -%}
{% endfor -%}
{% if frontend_caucase_url_hash_list %}
{% do frontend_caucase_updater_section_list.append('frontend-caucase-updater-housekeeper') -%}
[frontend-caucase-ssl]
ca = ${nginx-ssl-dir:ssl}/frontend_ca.crt
crl = ${nginx-ssl-dir:ssl}/frontend_crl.pem
[frontend-caucase-updater-housekeeper]
recipe = slapos.recipe.template
output = ${directory:bin}/frontend-caucase-updater-housekeeper
mode = 700
inline =
#!/bin/sh -e
# assemble all frontends CA and CRL in one file
CA=${frontend-caucase-ssl:ca}
:> $CA.tmp
CRL=${frontend-caucase-ssl:crl}
:> $CRL.tmp
{% for hash in frontend_caucase_url_hash_list %}
{% set data_dir = '${nginx-ssl-dir:ssl}/%s' % hash %}
echo "# {{ data_dir }}/ca.crt" >> $CA.tmp
cat "{{ data_dir }}/ca.crt" >> $CA.tmp
echo "# {{ data_dir }}/crl.pem" >> $CRL.tmp
cat "{{ data_dir }}/crl.pem" >> $CRL.tmp
{% endfor %}
mv $CA.tmp $CA
mv $CRL.tmp $CRL
kill -HUP $(cat ${directory:run}/nginx.pid)
{% endif %}
# srv/nginx/ prefix + etc/ log/ ...
[nginx-dir]
recipe = slapos.cookbook:mkdirectory
......@@ -787,6 +844,9 @@ ssl = ${nginx-ssl-dir:ssl}
cert_file = ${nginx-generate-certificate:cert_file}
key_file = ${nginx-generate-certificate:key_file}
client_ca_file = ${frontend-caucase-ssl:ca}
client_crl_file = ${frontend-caucase-ssl:crl}
[nginx-symlinks]
# (nginx wants <prefix>/logs to be there from start - else it issues alarm to the log)
......@@ -801,6 +861,9 @@ depend =
${nginx-symlinks:recipe}
${promise-nginx:recipe}
${logrotate-entry-nginx:recipe}
{% for section in frontend_caucase_updater_section_list %}
{{ '${' ~ section ~ ':recipe}' }}
{% endfor %}
[promise-nginx]
......
......@@ -41,8 +41,12 @@ configuration.icp_license =
recipe = slapos.recipe.template:jinja2
mode = 0644
output= $${buildout:directory}/$${:_buildout_section_name_}
extensions = jinja2.ext.do
import-list =
rawfile caucase ${caucase-jinja2-library:target}
context =
import os os
import hashlib hashlib
import pwd pwd
key bin_directory buildout:bin-directory
......
......@@ -3,6 +3,7 @@
extends =
buildout.hash.cfg
../../stack/slapos.cfg
../../stack/caucase/buildout.cfg
../../stack/nodejs.cfg
../../stack/monitor/buildout.cfg
../../component/libgit2/buildout.cfg
......@@ -54,6 +55,7 @@ parts =
slapos-cookbook
eggs
caucase-eggs
bash
curl
......
......@@ -37,6 +37,8 @@ upstream gitlab-workhorse {
server unix:{{ gitlab_workhorse.socket }};
}
limit_req_zone $trusted_remote_addr zone=downloadarchive:10m rate={{ cfg('nginx_download_archive_rate_limit') }}r/m;
{# not needed for us - the frontend can do the redirection and also
gitlab/nginx speaks HSTS on https port so when we access https port via http
protocol, it gets redirected to https
......@@ -76,11 +78,12 @@ server {
## https://raymii.org/s/tutorials/Strong_SSL_Security_On_nginx.html & https://cipherli.st/
ssl_certificate {{ nginx.cert_file }};
ssl_certificate_key {{ nginx.key_file }};
{# we don't need - most root CA will be included by default
<% if @ssl_client_certificate %>
ssl_client_certificate <%= @ssl_client_certificate%>;
<% end %>
#}
{% if nginx.client_ca_file %}
ssl_client_certificate {{ nginx.client_ca_file }};
ssl_crl {{ nginx.client_crl_file }};
ssl_verify_client optional_no_ca;
{% endif %}
# GitLab needs backwards compatible ciphers to retain compatibility with Java IDEs
# NOTE(slapos) ^^^ is not relevant for us - we are behind frontend and clients
......@@ -110,6 +113,18 @@ server {
set_real_ip_from {{ trusted_address }};
{% endfor %}
## SlapOS: For Real IP, instead of trusting the frontends through their IP addresses,
## we expect the frontends to use a client certificate and we trust frontends only if
## we can validate that certificate.
set $trusted_remote_addr $remote_addr;
{% if nginx.client_ca_file %}
set_real_ip_from 0.0.0.0/0;
set_real_ip_from ::/0;
if ($ssl_client_verify != SUCCESS) {
set $trusted_remote_addr $realip_remote_addr;
}
{% endif %}
## HSTS Config
## https://www.nginx.com/blog/http-strict-transport-security-hsts-and-nginx/
{% if int(cfg("nginx_hsts_max_age")) > 0 -%}
......@@ -148,6 +163,8 @@ server {
proxy_http_version 1.1;
limit_req_status 429;
{# we do not support relative URL - path is always "/" #}
{% set path = "/" %}
......@@ -163,7 +180,21 @@ server {
{% if cfg_https %}
proxy_set_header X-Forwarded-Ssl on;
{% endif %}
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-For $trusted_remote_addr;
proxy_set_header X-Forwarded-Proto {{ "https" if cfg_https else "http" }};
proxy_pass http://gitlab-workhorse;
}
## archive downloads are rate limited.
location ~ /[^/]+/[^/]+/-/archive/.* {
limit_req zone=downloadarchive;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
{% if cfg_https %}
proxy_set_header X-Forwarded-Ssl on;
{% endif %}
proxy_set_header X-Forwarded-For $trusted_remote_addr;
proxy_set_header X-Forwarded-Proto {{ "https" if cfg_https else "http" }};
proxy_pass http://gitlab-workhorse;
......@@ -188,7 +219,7 @@ server {
{% if cfg_https %}
proxy_set_header X-Forwarded-Ssl on;
{% endif %}
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-For $trusted_remote_addr;
proxy_set_header X-Forwarded-Proto {{ "https" if cfg_https else "http" }};
proxy_pass http://gitlab-workhorse;
......
......@@ -26,65 +26,206 @@
##############################################################################
import os
import requests
import functools
import urllib.parse
import subprocess
import time
from typing import Optional, Tuple
import bs4
from urllib.parse import urljoin
import requests
from slapos.testing.testcase import makeModuleSetUpAndTestCaseClass
from slapos.testing.caucase import CaucaseCertificate, CaucaseService
setUpModule, SlapOSInstanceTestCase = makeModuleSetUpAndTestCaseClass(
os.path.abspath(
os.path.join(os.path.dirname(__file__), '..', 'software.cfg')))
os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "software.cfg"))
)
class TestGitlab(SlapOSInstanceTestCase):
__partition_reference__ = 'G' # solve path too long for postgresql and unicorn
__partition_reference__ = "G" # solve path too long for postgresql and unicorn
instance_max_retry = 50 # puma takes time to be ready
@classmethod
def getInstanceSoftwareType(cls):
return 'gitlab'
return "gitlab"
@classmethod
def getInstanceParameterDict(cls):
return {'root-password': 'admin1234'}
frontend_caucase = cls.getManagedResource("frontend_caucase", CaucaseService)
certificate = cls.getManagedResource("client_certificate", CaucaseCertificate)
certificate.request("shared frontend", frontend_caucase)
return {
"root-password": "admin1234",
"frontend-caucase-url-list": frontend_caucase.url,
}
def setUp(self):
self.backend_url = self.computer_partition.getConnectionParameterDict(
)['backend_url']
self.backend_url = self.computer_partition.getConnectionParameterDict()[
"backend_url"
]
def test_http_get(self):
resp = requests.get(self.backend_url, verify=False)
self.assertTrue(
resp.status_code in [requests.codes.ok, requests.codes.found])
self.assertTrue(resp.status_code in [requests.codes.ok, requests.codes.found])
def test_rack_attack_sign_in_rate_limiting(self):
session = requests.session()
client_certificate = self.getManagedResource(
"client_certificate", CaucaseCertificate
)
session = requests.Session()
session.cert = (client_certificate.cert_file, client_certificate.key_file)
# Load the login page to get a CSRF token.
response = session.get(urljoin(self.backend_url, 'users/sign_in'), verify=False)
response = session.get(
urllib.parse.urljoin(self.backend_url, "users/sign_in"), verify=False
)
self.assertEqual(response.status_code, 200)
# Extract the CSRF token and param.
bsoup = bs4.BeautifulSoup(response.text, 'html.parser')
csrf_param = bsoup.find('meta', dict(name='csrf-param'))['content']
csrf_token = bsoup.find('meta', dict(name='csrf-token'))['content']
bsoup = bs4.BeautifulSoup(response.text, "html.parser")
csrf_param = bsoup.find("meta", dict(name="csrf-param"))["content"]
csrf_token = bsoup.find("meta", dict(name="csrf-token"))["content"]
request_data = {
'user[login]': 'test',
'user[password]': 'random',
csrf_param: csrf_token}
"user[login]": "test",
"user[password]": "random",
csrf_param: csrf_token,
}
sign_in = functools.partial(
session.post,
response.url,
data=request_data,
verify=False)
session.post, response.url, data=request_data, verify=False
)
for _ in range(10):
sign_in(headers={'X-Forwarded-For': '1.2.3.4'})
sign_in(headers={"X-Forwarded-For": "1.2.3.4"}).raise_for_status()
# after 10 authentication failures, this client is rate limited
self.assertEqual(sign_in(headers={'X-Forwarded-For': '1.2.3.4'}).status_code, 429)
self.assertEqual(sign_in(headers={"X-Forwarded-For": "1.2.3.4"}).status_code, 429)
# but other clients are not
self.assertNotEqual(sign_in(headers={'X-Forwarded-For': '5.6.7.8'}).status_code, 429)
self.assertNotEqual(
sign_in(headers={"X-Forwarded-For": "5.6.7.8"}).status_code, 429
)
def _get_client_ip_address_from_nginx_log(
self, cert: Optional[Tuple[str, str]]
) -> str:
requests.get(
urllib.parse.urljoin(
self.backend_url,
f"/users/sign_in?request_id={self.id()}",
),
verify=False,
cert=cert,
headers={"X-Forwarded-For": "1.2.3.4"},
).raise_for_status()
nginx_log_file = (
self.computer_partition_root_path / "var" / "log" / "nginx" / "gitlab_access.log"
)
for _ in range(100):
last_log_line = nginx_log_file.read_text().splitlines()[-1]
if self.id() in last_log_line:
return last_log_line.split("-")[0].strip()
time.sleep(1)
raise RuntimeError(f"Could not find {self.id()} in {last_log_line=}")
def test_client_ip_in_nginx_log_with_certificate(self):
client_certificate = self.getManagedResource(
"client_certificate", CaucaseCertificate
)
self.assertEqual(
self._get_client_ip_address_from_nginx_log(
cert=(client_certificate.cert_file, client_certificate.key_file)
),
"1.2.3.4",
)
def test_client_ip_in_nginx_log_without_certificate(self):
self.assertNotEqual(
self._get_client_ip_address_from_nginx_log(cert=None),
"1.2.3.4",
)
def test_client_ip_in_nginx_log_with_not_verified_certificate(self):
another_unrelated_caucase = self.getManagedResource(
"another_unrelated_caucase", CaucaseService
)
unknown_client_certificate = self.getManagedResource(
"unknown_client_certificate", CaucaseCertificate
)
unknown_client_certificate.request(
"unknown client certificate", another_unrelated_caucase
)
self.assertNotEqual(
self._get_client_ip_address_from_nginx_log(
cert=(unknown_client_certificate.cert_file, unknown_client_certificate.key_file)
),
"1.2.3.4",
)
def test_download_archive_rate_limiting(self):
gitlab_rails_bin = self.computer_partition_root_path / 'bin' / 'gitlab-rails'
subprocess.check_call(
(gitlab_rails_bin,
'runner',
"user = User.find(1);" \
"token = user.personal_access_tokens.create(scopes: [:api], name: 'Root token');" \
"token.set_token('SLurtnxPscPsU-SDm4oN');" \
"token.save!"),
)
client_certificate = self.getManagedResource('client_certificate', CaucaseCertificate)
with requests.Session() as session:
session.cert = (client_certificate.cert_file, client_certificate.key_file)
session.verify = False
ret = session.post(
urllib.parse.urljoin(self.backend_url, '/api/v4/projects'),
data={
'name': 'sample-test',
'visibility': 'public',
},
headers={"PRIVATE-TOKEN" : 'SLurtnxPscPsU-SDm4oN'},
)
ret.raise_for_status()
project_id = ret.json()['id']
session.post(
urllib.parse.urljoin(
self.backend_url, f"/api/v4/projects/{project_id}/repository/commits"
),
json={
"branch": "main",
"commit_message": "Add a file to test download archive",
"actions": [
{"action": "create", "file_path": "README.md", "content": "file content"}
],
},
headers={"PRIVATE-TOKEN": "SLurtnxPscPsU-SDm4oN"},
).raise_for_status()
for i, ext in enumerate(("zip", "tar.gz", "tar.bz2", "tar")):
headers = {"X-Forwarded-For": f"{i}.{i}.{i}.{i}"}
get = functools.partial(
session.get,
urllib.parse.urljoin(
self.backend_url,
f"/root/sample-test/-/archive/main/sample-test-main.{ext}",
),
headers=headers,
)
with self.subTest(ext):
get().raise_for_status()
self.assertEqual(get().status_code, 429)
self.assertEqual(
session.get(
urllib.parse.urljoin(
self.backend_url,
f"/root/sample-test/-/archive/invalidref/sample-test-invalidref.zip",
),
).status_code,
404,
)
......@@ -449,7 +449,7 @@ class TestTheiaResiliencePeertube(test_resiliency.TestTheiaResilience):
instance_type, 'srv', 'runner', 'instance', partition, *paths)
class TestTheiaResilienceGitlab(test_resiliency.TestTheiaResilience):
test_instance_max_retries = 12
test_instance_max_retries = 50 # puma takes time to be ready
backup_max_tries = 480
backup_wait_interval = 60
_connection_parameters_regex = re.compile(r"{.*}", re.DOTALL)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment