From 9722568361e3747ccd998e81113534bb79d615a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Nowak?= <luke@nexedi.com>
Date: Mon, 8 Mar 2021 14:12:03 +0100
Subject: [PATCH] kvm: Implement whitelist firewall

Sources of domains and IPs are:

 * default hardcoded in template/whitelist-domains-default
 * /etc/resolv.conf
 * provided in the request
 * provided in the special downloadble repository

Then they are parsed with dnsresolver and .slapos-whitelist-firewall file is
produced with list of IPs to be whitelisted.

This allows slapos.core whitelistfirewall manager to lock-down the partition
to only whitelisted list of IPs.
---
 software/kvm/buildout.hash.cfg                |  14 ++-
 .../instance-kvm-cluster-input-schema.json    |   6 +
 ...e-kvm-cluster-simplified-input-schema.json |   6 +
 .../kvm/instance-kvm-cluster.cfg.jinja2.in    |   2 +-
 software/kvm/instance-kvm-input-schema.json   |   6 +
 software/kvm/instance-kvm.cfg.jinja2          |  59 +++++++++-
 software/kvm/instance.cfg.in                  |   3 +
 software/kvm/software.cfg                     |   8 ++
 .../kvm/template/whitelist-domains-default    |  13 +++
 .../whitelist-firewall-download-controller.py |  38 ++++++
 software/kvm/test/test.py                     | 110 +++++++++++++++++-
 11 files changed, 258 insertions(+), 7 deletions(-)
 create mode 100644 software/kvm/template/whitelist-domains-default
 create mode 100644 software/kvm/template/whitelist-firewall-download-controller.py

diff --git a/software/kvm/buildout.hash.cfg b/software/kvm/buildout.hash.cfg
index b9e3ae594..c3271fbd5 100644
--- a/software/kvm/buildout.hash.cfg
+++ b/software/kvm/buildout.hash.cfg
@@ -15,15 +15,15 @@
 
 [template]
 filename = instance.cfg.in
-md5sum = e6d5c7bb627b4f1d3e7c99721b7c58fe
+md5sum = 399b398a8eabfa6126d2a521dc779f9b
 
 [template-kvm]
 filename = instance-kvm.cfg.jinja2
-md5sum = 89796d6d6a25f694291f9b45181830ad
+md5sum = 704b6ac6bf42837bcd8f4582c5a746c0
 
 [template-kvm-cluster]
 filename = instance-kvm-cluster.cfg.jinja2.in
-md5sum = 8a6c5555efd63ac7d471b8fdabb69f7e
+md5sum = 80b9b70ba1ccbc09deb8f9cad60f352c
 
 [template-kvm-resilient]
 filename = instance-kvm-resilient.cfg.jinja2
@@ -88,3 +88,11 @@ md5sum = 9c67058edcc4edae0b57956c0932a9fc
 [image-download-config-creator]
 _update_hash_filename_ = template/image-download-config-creator.py
 md5sum = 54261e418ab9860efe73efd514c4d47f
+
+[whitelist-firewall-download-controller]
+_update_hash_filename_ = template/whitelist-firewall-download-controller.py
+md5sum = bc64e29546833817636261d1b28aa6dc
+
+[whitelist-domains-default]
+_update_hash_filename_ = template/whitelist-domains-default
+md5sum = e9d40162ba77472775256637a2617d14
diff --git a/software/kvm/instance-kvm-cluster-input-schema.json b/software/kvm/instance-kvm-cluster-input-schema.json
index db51f9f51..27bf17456 100644
--- a/software/kvm/instance-kvm-cluster-input-schema.json
+++ b/software/kvm/instance-kvm-cluster-input-schema.json
@@ -556,6 +556,12 @@
                   "title": "FreeBSD 12.1 RELEASE bootonly x86_64"
                 }
               ]
+            },
+            "whitelist-domains": {
+              "title": "Whitelist domains",
+              "description": "List of whitelisted domain names to be accessed from the VM. They will be resolved to IPs depending on where the VM end up. IPs can be used too.",
+              "type": "string",
+              "textarea": true
             }
           },
           "type": "object"
diff --git a/software/kvm/instance-kvm-cluster-simplified-input-schema.json b/software/kvm/instance-kvm-cluster-simplified-input-schema.json
index 941d86d10..96c59fa44 100644
--- a/software/kvm/instance-kvm-cluster-simplified-input-schema.json
+++ b/software/kvm/instance-kvm-cluster-simplified-input-schema.json
@@ -107,6 +107,12 @@
                   "title": "FreeBSD 12.1 RELEASE bootonly x86_64"
                 }
               ]
+            },
+            "whitelist-domains": {
+              "title": "Whitelist domains",
+              "description": "List of whitelisted domain names to be accessed from the VM. They will be resolved to IPs depending on where the VM end up. IPs can be used too.",
+              "type": "string",
+              "textarea": true
             }
           },
           "type": "object"
diff --git a/software/kvm/instance-kvm-cluster.cfg.jinja2.in b/software/kvm/instance-kvm-cluster.cfg.jinja2.in
index 589d731d1..6ebd638f7 100644
--- a/software/kvm/instance-kvm-cluster.cfg.jinja2.in
+++ b/software/kvm/instance-kvm-cluster.cfg.jinja2.in
@@ -135,7 +135,7 @@ config-document-host = ${apache-conf:ip}
 config-document-port = ${apache-conf:port}
 config-document-path = ${hash-code:passwd}
 config-keyboard-layout-language = {{ dumps(kvm_parameter_dict.get('keyboard-layout-language', 'fr')) }}
-{%- for k in ['boot-image-url-list', 'boot-image-url-select'] %}
+{%- for k in ['boot-image-url-list', 'boot-image-url-select', 'whitelist-domains'] %}
 {#-   play nice - use parameter only if present #}
 {%-   if k in kvm_parameter_dict %}
 {#-     play safe - dumps value #}
diff --git a/software/kvm/instance-kvm-input-schema.json b/software/kvm/instance-kvm-input-schema.json
index 8cfc6bb8d..5a02a0ebe 100644
--- a/software/kvm/instance-kvm-input-schema.json
+++ b/software/kvm/instance-kvm-input-schema.json
@@ -419,6 +419,12 @@
           "title": "FreeBSD 12.1 RELEASE bootonly x86_64"
         }
       ]
+    },
+    "whitelist-domains": {
+      "title": "Whitelist domains",
+      "description": "List of whitelisted domain names to be accessed from the VM. They will be resolved to IPs depending on where the VM end up. IPs can be used too.",
+      "type": "string",
+      "textarea": true
     }
   }
 }
diff --git a/software/kvm/instance-kvm.cfg.jinja2 b/software/kvm/instance-kvm.cfg.jinja2
index 758e00cf4..78712083d 100644
--- a/software/kvm/instance-kvm.cfg.jinja2
+++ b/software/kvm/instance-kvm.cfg.jinja2
@@ -16,6 +16,7 @@
 {% set instance_type = slapparameter_dict.get('type', 'standalone') -%}
 {% set nat_rule_list = slapparameter_dict.get('nat-rules', '22 80 443') -%}
 {% set disk_device_path = slapparameter_dict.get('disk-device-path', None) -%}
+{% set whitelist_domains = slapparameter_dict.get('whitelist-domains', '') -%}
 {% set boot_image_url_list_enabled = 'boot-image-url-list' in slapparameter_dict %}
 {% set boot_image_url_select_enabled = 'boot-image-url-select' in slapparameter_dict %}
 {% set cpu_max_count = dumps(slapparameter_dict.get('cpu-max-count', int(slapparameter_dict.get('cpu-count', 1)) + 1)) %}
@@ -972,7 +973,7 @@ keyboard-layout-language = fr
 {% set key_list =  v.split('\n') -%}
 {{ k }} =
   {{ key_list | join('\n  ') }}
-{% elif k in ['boot-image-url-list', 'boot-image-url-select'] %}
+{% elif k in ['boot-image-url-list', 'boot-image-url-select', 'whitelist-domains'] %}
 {# needs to decorate possibly multiline or maybe unsafe value #}
 {{ k }} = {{ dumps(v) }}
 {% else -%}
@@ -1038,8 +1039,62 @@ command-line =
 
 {% endif -%}
 
+{%   do part_list.append('whitelist-firewall') -%}
+[whitelist-firewall]
+recipe = slapos.cookbook:wrapper
+hash-existing-files = ${buildout:directory}/software_release/buildout.cfg
+wrapper-path = ${directory:scripts}/${:_buildout_section_name_}
+command-line =
+  {{ dnsresolver_executable }}
+  --style list
+  --output ${:output}
+  ${:source}
+
+source =
+  ${whitelist-domains-request:rendered}
+  {{ whitelist_domains_default }}
+  ${whitelist-domains-resolv.conf:output}
+  ${whitelist-domains-download:output}
+output = ${buildout:directory}/.slapos-whitelist-firewall
+
+[whitelist-firewall-directory]
+recipe = plone.recipe.command
+location = ${buildout:parts-directory}/whitelist-firewall
+command = mkdir -p ${:location}
+update-command = ${:command}
+
+[whitelist-domains-request]
+recipe = slapos.recipe.template:jinja2
+template = inline:
+{%- raw %}
+  {%- for domain in whitelist_domains.split() %}
+  {{ domain }}
+  {%- endfor %}
+{% endraw -%}
+rendered = ${whitelist-firewall-directory:location}/${:_buildout_section_name_}.txt
+whitelist-domains = {{ dumps(whitelist_domains) }}
+extensions = jinja2.ext.do
+context =
+  key whitelist_domains :whitelist-domains
+
+[whitelist-domains-resolv.conf]
+recipe = plone.recipe.command
+output = ${whitelist-firewall-directory:location}/${:_buildout_section_name_}.txt
+update-command = ${:command}
+command =
+  egrep ^nameserver /etc/resolv.conf  | cut -d ' ' -f 2 > ${:output}
+
+[whitelist-domains-download]
+recipe = slapos.cookbook:wrapper
+hash-existing-files = ${buildout:directory}/software_release/buildout.cfg
+wrapper-path = ${directory:scripts}/${:_buildout_section_name_}
+output = ${whitelist-firewall-directory:location}/${:_buildout_section_name_}.txt
+interval = 3600
+command-line = {{ python_executable }} {{ whitelist_firewall_download_controller }} {{ curl_executable_location }} 3600 ${:output} ${:url}
+
+url = https://stream.nxdcdn.com/rapidspace-whitelist-domains
+
 [instance-kvm-parts]
-# Expose parts for easy addition in profiles which extend this one like resilient
 parts =
   certificate-authority
   certificate-authority-service
diff --git a/software/kvm/instance.cfg.in b/software/kvm/instance.cfg.in
index d961c5309..5095d2348 100644
--- a/software/kvm/instance.cfg.in
+++ b/software/kvm/instance.cfg.in
@@ -80,9 +80,12 @@ extra-context =
   raw ansible_promise_tpl ${template-ansible-promise:location}/${template-ansible-promise:filename}
   raw curl_executable_location ${curl:location}/bin/curl
   raw dash_executable_location ${dash:location}/bin/dash
+  raw dnsresolver_executable ${buildout:bin-directory}/dnsresolver
   raw dcron_executable_location ${dcron:location}/sbin/crond
   raw debian_amd64_netinst_location ${debian-amd64-netinst.iso:location}/${debian-amd64-netinst.iso:filename}
   raw file_download_script ${file-download-script:location}/${file-download-script:filename}
+  raw whitelist_domains_default ${whitelist-domains-default:location}/${whitelist-domains-default:filename}
+  raw whitelist_firewall_download_controller ${whitelist-firewall-download-controller:target}
   raw image_download_controller ${image-download-controller:target}
   raw image_download_config_creator ${image-download-config-creator:target}
   raw logrotate_cfg ${template-logrotate-base:rendered}
diff --git a/software/kvm/software.cfg b/software/kvm/software.cfg
index 7d604b6d4..6f7ad8d3c 100644
--- a/software/kvm/software.cfg
+++ b/software/kvm/software.cfg
@@ -182,6 +182,10 @@ path = download_file.in
 filename = download_file
 on-update = true
 
+[whitelist-domains-default]
+<= download-template-base
+filename = whitelist-domains-default
+
 [template-httpd]
 recipe = slapos.recipe.template:jinja2
 template = ${:_profile_base_location_}/instance-kvm-http.cfg.in
@@ -199,6 +203,10 @@ mode = 640
 [image-download-config-creator]
 <= image-download-controller
 
+[whitelist-firewall-download-controller]
+<= image-download-controller
+
+
 [versions]
 websockify = 0.9.0
 
diff --git a/software/kvm/template/whitelist-domains-default b/software/kvm/template/whitelist-domains-default
new file mode 100644
index 000000000..475bb580f
--- /dev/null
+++ b/software/kvm/template/whitelist-domains-default
@@ -0,0 +1,13 @@
+# Minimal whitelisted domains needed to instantiate the instance
+# Does not guarantee good usage of the guest VM itself
+# The full list shall end up in whitelist-domains-download:url
+# shcache.nxdcdn.com is default source for a lot of operations
+shacache.nxdcdn.com
+# stream.nxdcdn.com is needed by partition itself
+stream.nxdcdn.com
+# partition has to access default SlapOS Master
+slap.vifib.com
+slapos.vifib.com
+# Partition needs access to SlapOS Master related resources
+hnode.cdn.vifib.com
+node.cdn.vifib.com
diff --git a/software/kvm/template/whitelist-firewall-download-controller.py b/software/kvm/template/whitelist-firewall-download-controller.py
new file mode 100644
index 000000000..c575beb3e
--- /dev/null
+++ b/software/kvm/template/whitelist-firewall-download-controller.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+import os
+import subprocess
+import sys
+import time
+import logging
+
+
+# Note: Assuring only one running instance is not done, as this script is only
+#       run from supervisord, which does it already
+if __name__ == "__main__":
+  curl, sleep, output, url = sys.argv[1:]
+  sleep = int(sleep)
+  tmp_output = output + '.tmp'
+
+  logging.basicConfig(
+    format='%%(asctime)s [%%(levelname)s] %s : %%(message)s' % (url,),
+    level=logging.DEBUG)
+  logging.info('Redownloading each %is', sleep)
+  while True:
+    logging.info('Fetching')
+    try:
+      subprocess.check_output([
+        curl,
+        '--location',  # follow redirects
+        '--no-progress-meter',  # do not tell too much
+        '--max-time', '600',  # 10 minutes is maximum
+        '--fail',  # fail in case of wrong HTTP code
+        '--output', tmp_output, url],
+        stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as e:
+      logging.error('Problem while downloading: %r', e.output.strip())
+    if os.path.exists(tmp_output):
+      logging.info('Stored output')
+      os.rename(tmp_output, output)
+    logging.info('Sleeping for %is', sleep)
+    time.sleep(sleep)
diff --git a/software/kvm/test/test.py b/software/kvm/test/test.py
index cae2ae089..31dac0271 100644
--- a/software/kvm/test/test.py
+++ b/software/kvm/test/test.py
@@ -168,7 +168,9 @@ i0:kvm-{hash}-on-watch RUNNING
 i0:kvm_controller EXITED
 i0:monitor-httpd-{hash}-on-watch RUNNING
 i0:monitor-httpd-graceful EXITED
-i0:websockify-{hash}-on-watch RUNNING""",
+i0:websockify-{hash}-on-watch RUNNING
+i0:whitelist-domains-download-{hash} RUNNING
+i0:whitelist-firewall-{hash} RUNNING""",
       self.getProcessInfo()
     )
 
@@ -496,6 +498,8 @@ ir2:resilient_sshkeys_authority-on-watch RUNNING
 ir2:sshd-graceful EXITED
 ir2:sshd-on-watch RUNNING
 ir2:websockify-{hash}-on-watch RUNNING
+ir2:whitelist-domains-download-{hash} RUNNING
+ir2:whitelist-firewall-{hash} RUNNING
 ir3:bootstrap-monitor EXITED
 ir3:certificate_authority-{hash}-on-watch RUNNING
 ir3:crond-{hash}-on-watch RUNNING
@@ -1170,3 +1174,107 @@ class TestNatRulesKvmCluster(InstanceTestCase):
 class TestNatRulesKvmClusterComplex(TestNatRulesKvmCluster):
   __partition_reference__ = 'nrkcc'
   nat_rules = ["100", "200 300"]
+
+
+@skipUnlessKvm
+class TestWhitelistFirewall(InstanceTestCase):
+  __partition_reference__ = 'wf'
+  kvm_instance_partition_reference = 'wf0'
+
+  def test(self):
+    slapos_whitelist_firewall = os.path.join(
+      self.slap.instance_directory, self.kvm_instance_partition_reference,
+      '.slapos-whitelist-firewall')
+    self.assertTrue(os.path.exists(slapos_whitelist_firewall))
+    with open(slapos_whitelist_firewall, 'rb') as fh:
+      content = fh.read().encode('utf-8')
+    try:
+      self.content_json = json.loads(content)
+    except ValueError:
+      self.fail('Failed to parse json of %s' % (content,))
+    self.assertTrue(isinstance(self.content_json, list))
+    # check /etc/resolv.conf
+    with open('/etc/resolv.conf', 'rb') as fh:
+      resolv_conf_ip_list = []
+      for line in fh.readlines():
+        line = line.encode('utf-8')
+        if line.startswith('nameserver'):
+          resolv_conf_ip_list.append(line.split()[1])
+    resolv_conf_ip_list = list(set(resolv_conf_ip_list))
+    self.assertFalse(len(resolv_conf_ip_list) == 0)
+    self.assertTrue(all([q in self.content_json for q in resolv_conf_ip_list]))
+    # there is something more
+    self.assertGreater(len(self.content_json), len(resolv_conf_ip_list))
+
+
+@skipUnlessKvm
+class TestWhitelistFirewallRequest(TestWhitelistFirewall):
+  whitelist_domains = '2.2.2.2 3.3.3.3\n4.4.4.4'
+  @classmethod
+  def getInstanceParameterDict(cls):
+    return {
+      'whitelist-domains': cls.whitelist_domains,
+    }
+
+  def test(self):
+    super(TestWhitelistFirewallRequest, self).test()
+    self.assertIn('2.2.2.2', self.content_json)
+    self.assertIn('3.3.3.3', self.content_json)
+    self.assertIn('4.4.4.4', self.content_json)
+
+
+@skipUnlessKvm
+class TestWhitelistFirewallResilient(TestWhitelistFirewall):
+  kvm_instance_partition_reference = 'wf2'
+
+  @classmethod
+  def getInstanceSoftwareType(cls):
+    return 'kvm-resilient'
+
+
+@skipUnlessKvm
+class TestWhitelistFirewallRequestResilient(TestWhitelistFirewallRequest):
+  kvm_instance_partition_reference = 'wf2'
+
+  @classmethod
+  def getInstanceSoftwareType(cls):
+    return 'kvm-resilient'
+
+
+@skipUnlessKvm
+class TestWhitelistFirewallCluster(TestWhitelistFirewall):
+  kvm_instance_partition_reference = 'wf1'
+
+  @classmethod
+  def getInstanceSoftwareType(cls):
+    return 'kvm-cluster'
+
+  @classmethod
+  def getInstanceParameterDict(cls):
+    return {'_': json.dumps({
+      "kvm-partition-dict": {
+        "KVM0": {
+            "disable-ansible-promise": True
+        }
+      }
+    })}
+
+
+@skipUnlessKvm
+class TestWhitelistFirewallRequestCluster(TestWhitelistFirewallRequest):
+  kvm_instance_partition_reference = 'wf1'
+
+  @classmethod
+  def getInstanceSoftwareType(cls):
+    return 'kvm-cluster'
+
+  @classmethod
+  def getInstanceParameterDict(cls):
+    return {'_': json.dumps({
+      "kvm-partition-dict": {
+        "KVM0": {
+            "whitelist-domains": cls.whitelist_domains,
+            "disable-ansible-promise": True
+        }
+      }
+    })}
-- 
2.30.9