Commit 4a1d1520 authored by Julien Muchembled's avatar Julien Muchembled Committed by Xavier Thompson

[feat] download: add support for slapos.libnetworkcache

When specifying an alternate URL as fallback, the main URL is always
used for both downloading & uploading from/to networkcache.
parent 67993514
......@@ -400,6 +400,9 @@ def _get_user_config():
return os.path.join(buildout_home, 'default.cfg')
networkcache_client = None
@commands
class Buildout(DictMixin):
......@@ -699,6 +702,19 @@ class Buildout(DictMixin):
os.chdir(options['directory'])
networkcache_section_name = options.get('networkcache-section')
if networkcache_section_name:
networkcache_section = self[networkcache_section_name]
try:
from slapos.libnetworkcache import NetworkcacheClient
global networkcache_client
networkcache_client = NetworkcacheClient(networkcache_section)
except ImportError:
pass
except Exception:
self._logger.exception(
"Failed to setup Networkcache. Continue without.")
def _buildout_path(self, name):
if '${' in name:
return name
......
......@@ -27,7 +27,7 @@ except ImportError:
# Python 2
from urlparse import urlparse
from urlparse import urlunparse
from urllib2 import Request, urlopen, HTTPError
from urllib2 import HTTPError, Request, urlopen
from zc.buildout.easy_install import realpath
from base64 import b64encode
......@@ -220,27 +220,31 @@ class Download(object):
if not path:
handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
os.close(handle)
self._download(url, tmp_path, md5sum, alternate_url)
cleanup = False
finally:
if cleanup and tmp_path:
remove(tmp_path)
return tmp_path, not path
def _download(self, url, path, md5sum=None, alternate_url=None):
download_url = url
try:
try:
tmp_path, headers = self.urlretrieve(url, tmp_path)
self.urlretrieve(url, path)
except HTTPError:
if not alternate_url:
raise
self.logger.info('using alternate URL: %s', alternate_url)
download_url = alternate_url
tmp_path, headers = self.urlretrieve(
alternate_url, tmp_path)
if not check_md5sum(tmp_path, md5sum):
raise ChecksumError(
'MD5 checksum mismatch downloading %r' % download_url)
cleanup = False
self.urlretrieve(alternate_url, path)
if not check_md5sum(path, md5sum):
raise ChecksumError('MD5 checksum mismatch downloading %r'
% download_url)
except IOError as e:
raise zc.buildout.UserError("Error downloading %s: %s"
% (download_url, e))
finally:
if cleanup and tmp_path:
remove(tmp_path)
return tmp_path, not path
def filename(self, url):
"""Determine a file name from a URL according to the configuration.
......@@ -269,29 +273,61 @@ class Download(object):
url_host, url_port = parsed[-2:]
return '%s:%s' % (url_host, url_port)
def urlretrieve(self, url, tmp_path):
def _auth(self, url):
parsed_url = urlparse(url)
req = url
while parsed_url.scheme in ('http', 'https'): # not a loop
if parsed_url.scheme in ('http', 'https'):
auth_host = parsed_url.netloc.rsplit('@', 1)
if len(auth_host) > 1:
auth = auth_host[0]
url = parsed_url._replace(netloc=auth_host[1]).geturl()
else:
return (auth_host[0],
parsed_url._replace(netloc=auth_host[1]).geturl())
auth = netrc.authenticators(parsed_url.hostname)
if not auth:
break
auth = '{0}:{2}'.format(*auth)
req = Request(url)
if auth:
return '{0}:{2}'.format(*auth), url
def urlretrieve(self, url, tmp_path):
auth = self._auth(url)
if auth:
req = Request(auth[1])
req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth))))
break
"Basic " + bytes2str(b64encode(str2bytes(auth[0]))))
else:
req = url
with closing(urlopen(req)) as src:
with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst)
return tmp_path, src.info()
class Download(Download):
def _download(self, url, path, md5sum=None, alternate_url=None):
from .buildout import networkcache_client as nc
while nc: # not a loop
if self._auth(url): # do not cache restricted data
nc = None
break
key = 'file-urlmd5:' + md5(url.encode()).hexdigest()
if not nc.tryDownload(key):
break
with nc:
entry = next(nc.select(key, {'url': url}), None)
if entry is None:
err = 'no matching entry'
else:
with closing(nc.download(entry['sha512'])) as src, \
open(path, 'wb') as dst:
shutil.copyfileobj(src, dst)
if check_md5sum(path, md5sum):
return
err = 'MD5 checksum mismatch'
self.logger.info('Cannot download from network cache: %s', err)
break
super(Download, self)._download(url, path, md5sum, alternate_url)
if nc and nc.tryUpload(key):
with nc, open(path, 'rb') as f:
nc.upload(f, key, url=url)
def check_md5sum(path, md5sum):
"""Tell whether the MD5 checksum of the file at path matches.
......
......@@ -67,8 +67,8 @@ An alternate URL can be used in case of HTTPError with the main one.
Useful when a version of a resource can only be downloaded with a temporary
URL as long as it's the last version, and this version is then moved to a
permanent place when a newer version is released. In such case, when using
a cache, it's important that the main URL (`url`) is always used as cache key.
And `alternate_url` shall be the temporary URL.
a cache (in particular networkcache), it's important that the main URL (`url`)
is always used as cache key. And `alternate_url` shall be the temporary URL.
>>> path, is_temp = download(server_url+'not-there',
... alternate_url=server_url+'foo.txt')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment