download.py 8.85 KB
Newer Older
1 2
##############################################################################
#
3
# Copyright (c) 2009 Zope Foundation and Contributors.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Buildout download infrastructure"""

try:
    from hashlib import md5
except ImportError:
    from md5 import new as md5
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37

try:
    # Python 3
    from urllib.request import FancyURLopener, URLopener, urlretrieve
    from urllib.parse import urlparse
    from urllib import request as urllib # for monkey patch below :(
except ImportError:
    # Python 2
    from urllib import FancyURLopener, URLopener, urlretrieve
    from urlparse import urlparse
    import urllib

class URLOpener(FancyURLopener):
    http_error_default = URLopener.http_error_default

urllib._urlopener = URLOpener() # Ook! Monkey patch!


38 39 40 41
from zc.buildout.easy_install import realpath
import logging
import os
import os.path
42
import re
43
import shutil
Jim Fulton's avatar
Jim Fulton committed
44
import sys
45 46 47 48 49 50 51 52 53 54 55
import tempfile
import zc.buildout

class ChecksumError(zc.buildout.UserError):
    pass

class Download(object):
    """Configurable download utility.

    Handles the download cache and offline mode.

56 57
    Download(options=None, cache=None, namespace=None,
             offline=False, fallback=False, hash_name=False, logger=None)
58 59 60 61

    options: mapping of buildout options (e.g. a ``buildout`` config section)
    cache: path to the download cache (excluding namespaces)
    namespace: namespace directory to use inside the cache
62 63
    offline: whether to operate in offline mode
    fallback: whether to use the cache as a fallback (try downloading first)
64 65 66 67 68 69 70
    hash_name: whether to use a hash of the URL as cache file name
    logger: an optional logger to receive download-related log messages

    """

    def __init__(self, options={}, cache=-1, namespace=None,
                 offline=-1, fallback=False, hash_name=False, logger=None):
71
        self.directory = options.get('directory', '')
72 73 74 75 76 77 78 79 80 81 82 83 84
        self.cache = cache
        if cache == -1:
            self.cache = options.get('download-cache')
        self.namespace = namespace
        self.offline = offline
        if offline == -1:
            self.offline = (options.get('offline') == 'true'
                            or options.get('install-from-cache') == 'true')
        self.fallback = fallback
        self.hash_name = hash_name
        self.logger = logger or logging.getLogger('zc.buildout')

    @property
85
    def download_cache(self):
86
        if self.cache is not None:
87 88 89 90 91 92
            return realpath(os.path.join(self.directory, self.cache))

    @property
    def cache_dir(self):
        if self.download_cache is not None:
            return os.path.join(self.download_cache, self.namespace or '')
93 94 95 96 97 98 99 100 101 102 103 104

    def __call__(self, url, md5sum=None, path=None):
        """Download a file according to the utility's configuration.

        url: URL to download
        md5sum: MD5 checksum to match
        path: where to place the downloaded file

        Returns the path to the downloaded file.

        """
        if self.cache:
105
            local_path, is_temp = self.download_cached(url, md5sum)
106
        else:
107
            local_path, is_temp = self.download(url, md5sum, path)
108

109
        return locate_at(local_path, path), is_temp
110 111 112 113 114 115 116 117 118

    def download_cached(self, url, md5sum=None):
        """Download a file from a URL using the cache.

        This method assumes that the cache has been configured. Optionally, it
        raises a ChecksumError if a cached copy of a file has an MD5 mismatch,
        but will not remove the copy in that case.

        """
119 120 121 122 123 124
        if not os.path.exists(self.download_cache):
            raise zc.buildout.UserError(
                'The directory:\n'
                '%r\n'
                "to be used as a download cache doesn't exist.\n"
                % self.download_cache)
125 126
        cache_dir = self.cache_dir
        if not os.path.exists(cache_dir):
127
            os.mkdir(cache_dir)
128 129 130 131
        cache_key = self.filename(url)
        cached_path = os.path.join(cache_dir, cache_key)

        self.logger.debug('Searching cache at %s' % cache_dir)
132
        if os.path.exists(cached_path):
133
            is_temp = False
134 135
            if self.fallback:
                try:
136
                    _, is_temp = self.download(url, md5sum, cached_path)
137 138 139 140 141 142 143 144 145 146 147 148 149
                except ChecksumError:
                    raise
                except Exception:
                    pass

            if not check_md5sum(cached_path, md5sum):
                raise ChecksumError(
                    'MD5 checksum mismatch for cached download '
                    'from %r at %r' % (url, cached_path))
            self.logger.debug('Using cache file %s' % cached_path)
        else:
            self.logger.debug('Cache miss; will cache %s as %s' %
                              (url, cached_path))
150
            _, is_temp = self.download(url, md5sum, cached_path)
151

152
        return cached_path, is_temp
153 154 155 156 157 158 159

    def download(self, url, md5sum=None, path=None):
        """Download a file from a URL to a given or temporary path.

        An online resource is always downloaded to a temporary file and moved
        to the specified path only after the download is complete and the
        checksum (if given) matches. If path is None, the temporary file is
160
        returned and the client code is responsible for cleaning it up.
161 162

        """
163 164
        # Make sure the drive letter in windows-style file paths isn't
        # interpreted as a URL scheme.
165 166
        if re.match(r"^[A-Za-z]:\\", url):
            url = 'file:' + url
167

168
        parsed_url = urlparse(url, 'file')
169 170
        url_scheme, _, url_path = parsed_url[:3]
        if url_scheme == 'file':
171
            self.logger.debug('Using local resource %s' % url)
172
            if not check_md5sum(url_path, md5sum):
173 174
                raise ChecksumError(
                    'MD5 checksum mismatch for local resource at %r.' %
175
                    url_path)
176
            return locate_at(url_path, path), False
177 178 179 180 181 182 183

        if self.offline:
            raise zc.buildout.UserError(
                "Couldn't download %r in offline mode." % url)

        self.logger.info('Downloading %s' % url)
        handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
184
        os.close(handle)
185
        try:
186 187 188 189 190 191 192 193 194 195 196 197
            tmp_path, headers = urlretrieve(url, tmp_path)
            if not check_md5sum(tmp_path, md5sum):
                raise ChecksumError(
                    'MD5 checksum mismatch downloading %r' % url)
        except IOError:
            e = sys.exc_info()[1]
            os.remove(tmp_path)
            raise zc.buildout.UserError("Error downloading extends for URL "
                              "%s: %s" % (url, e))
        except Exception:
            os.remove(tmp_path)
            raise
198 199 200

        if path:
            shutil.move(tmp_path, path)
201
            return path, False
202
        else:
203
            return tmp_path, True
204 205 206 207 208 209

    def filename(self, url):
        """Determine a file name from a URL according to the configuration.

        """
        if self.hash_name:
Jim Fulton's avatar
Jim Fulton committed
210
            return md5(url.encode()).hexdigest()
211
        else:
212 213
            if re.match(r"^[A-Za-z]:\\", url):
                url = 'file:' + url
214
            parsed = urlparse(url, 'file')
215
            url_path = parsed[2]
216 217 218 219 220 221 222 223

            if parsed[0] == 'file':
                while True:
                    url_path, name = os.path.split(url_path)
                    if name:
                        return name
                    if not url_path:
                        break
224
            else:
225 226 227 228 229 230
                for name in reversed(url_path.split('/')):
                    if name:
                        return name

            url_host, url_port = parsed[-2:]
            return '%s:%s' % (url_host, url_port)
231 232 233 234 235 236 237 238 239 240 241


def check_md5sum(path, md5sum):
    """Tell whether the MD5 checksum of the file at path matches.

    No checksum being given is considered a match.

    """
    if md5sum is None:
        return True

242
    f = open(path, 'rb')
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
    checksum = md5()
    try:
        chunk = f.read(2**16)
        while chunk:
            checksum.update(chunk)
            chunk = f.read(2**16)
        return checksum.hexdigest() == md5sum
    finally:
        f.close()


def remove(path):
    if os.path.exists(path):
        os.remove(path)


def locate_at(source, dest):
    if dest is None or realpath(dest) == realpath(source):
        return source

263 264 265 266 267 268 269
    if os.path.isdir(source):
        shutil.copytree(source, dest)
    else:
        try:
            os.link(source, dest)
        except (AttributeError, OSError):
            shutil.copyfile(source, dest)
270
    return dest