[feat] Extend Download API to use an alternate URL as fallback

This alternate URL is used in case of HTTPError with the main one. In any case, the main URL is used for both downloading & uploading from/to networkcache. This will be exposed by slapos.recipe.build:download* and slapos.recipe.cmmi recipes.

[feat] Extend Download API to use an alternate URL as fallback
This alternate URL is used in case of HTTPError with the main one. In any case, the main URL is used for both downloading & uploading from/to networkcache. This will be exposed by slapos.recipe.build:download* and slapos.recipe.cmmi recipes.
76466073 · Julien Muchembled · Xavier Thompson · 07da49ef · 76466073 · 76466073
Commit 76466073 authored Sep 20, 2021 by Julien Muchembled Committed by Xavier Thompson Mar 13, 2024
3 changed files
--- a/src/zc/buildout/download.py
+++ b/src/zc/buildout/download.py
@@ -121,7 +121,7 @@ class Download(object):
        """
        return self.download_cached if self.cache else self.download

-    def download_cached(self, url, md5sum=None, path=None):
+    def download_cached(self, url, md5sum=None, path=None, alternate_url=None):
        """Download a file from a URL using the cache.

        This method assumes that the cache has been configured.
@@ -154,7 +154,7 @@ class Download(object):
            # Don't download directly to cached_path to minimize
            # the probability to alter old data if download fails.
            try:
-                path, is_temp = self.download(url, md5sum, path)
+                path, is_temp = self.download(url, md5sum, path, alternate_url)
            except ChecksumError:
                raise
            except Exception:
@@ -177,11 +177,11 @@ class Download(object):
        else:
            self.logger.debug('Cache miss; will cache %s as %s' %
                              (url, cached_path))
-            self.download(url, md5sum, cached_path)
+            self.download(url, md5sum, cached_path, alternate_url)

        return locate_at(cached_path, path), False

-    def download(self, url, md5sum=None, path=None):
+    def download(self, url, md5sum=None, path=None, alternate_url=None):
        """Download a file from a URL to a given or temporary path.

        An online resource is always downloaded to a temporary file and moved
@@ -210,6 +210,7 @@ class Download(object):
                "Couldn't download %r in offline mode." % url)

        self.logger.info('Downloading %s' % url)
+        download_url = url
        tmp_path = path
        cleanup = True
        try:
@@ -223,10 +224,17 @@ class Download(object):
                tmp_path, url, self.logger,
                nc.get('signature-certificate-list'), md5sum):
                # Download from original url if not cached or md5sum doesn't match.
-                tmp_path, headers = urlretrieve(url, tmp_path)
+                try:
+                    tmp_path, headers = urlretrieve(url, tmp_path)
+                except HTTPError:
+                    if not alternate_url:
+                        raise
+                    self.logger.info('using alternate URL: %s', alternate_url)
+                    download_url = alternate_url
+                    tmp_path, headers = urlretrieve(alternate_url, tmp_path)
                if not check_md5sum(tmp_path, md5sum):
                    raise ChecksumError(
-                        'MD5 checksum mismatch downloading %r' % url)
+                        'MD5 checksum mismatch downloading %r' % download_url)
                # Upload the file to network cache.
                if nc.get('upload-cache-url') and nc.get('upload-dir-url'):
                    upload_network_cached(
@@ -242,7 +250,7 @@ class Download(object):
            cleanup = False
        except IOError as e:
            raise zc.buildout.UserError("Error downloading %s: %s"
-                                        % (url, e))
+                                        % (download_url, e))
        finally:
            if cleanup and tmp_path:
                remove(tmp_path)
@@ -302,8 +310,8 @@ def remove(path):
    if os.path.exists(path):
        os.remove(path)

-from zc.buildout.networkcache import download_network_cached, \
-                                     upload_network_cached
+from zc.buildout.networkcache import \
+  download_network_cached, upload_network_cached, HTTPError

 def locate_at(source, dest):
    if dest is None or realpath(dest) == realpath(source):

--- a/src/zc/buildout/tests/download.txt
+++ b/src/zc/buildout/tests/download.txt
@@ -63,6 +63,32 @@ When trying to access a file that doesn't exist, we'll get an exception:
 ... else: print_('woops')
 download error

+An alternate URL can be used in case of HTTPError with the main one.
+Useful when a version of a resource can only be downloaded with a temporary
+URL as long as it's the last version, and this version is then moved to a
+permanent place when a newer version is released. In such case, when using
+a cache (in particular networkcache), it's important that the main URL (`url`)
+is always used as cache key. And `alternate_url` shall be the temporary URL.
+
+>>> path, is_temp = download(server_url+'not-there',
+...                          alternate_url=server_url+'foo.txt')
+>>> cat(path)
+This is a foo text.
+>>> is_temp
+True
+>>> remove(path)
+
+The main URL is tried first:
+
+>>> write(server_data, 'other.txt', 'This is some other text.')
+>>> path, is_temp = download(server_url+'other.txt',
+...                          alternate_url=server_url+'foo.txt')
+>>> cat(path)
+This is some other text.
+>>> is_temp
+True
+>>> remove(path)
+
 Downloading a local file doesn't produce a temporary file but simply returns
 the local file itself:


--- a/src/zc/buildout/tests/extends-cache.txt.disabled
+++ b/src/zc/buildout/tests/extends-cache.txt.disabled
@@ -492,9 +492,9 @@ a better solution would re-use the logging already done by the utility.)

 >>> import zc.buildout
 >>> old_download = zc.buildout.download.Download.download
->>> def wrapper_download(self, url, md5sum=None, path=None):
+>>> def wrapper_download(self, url, *args, **kw):
 ...   print_("The URL %s was downloaded." % url)
-...   return old_download(url, md5sum, path)
+...   return old_download(url, *args, **kw)
 >>> zc.buildout.download.Download.download = wrapper_download

 >>> zc.buildout.buildout.main([])