Commit e3a552eb authored by Alain Takoudjou's avatar Alain Takoudjou Committed by Xavier Thompson

[feat] allow to rewrite url before download using netrc and macdef

This adds a generic mechanism in the Download API to rewrite the URL
to be downloaded, possibly with extra headers. Substitued groups from
the matching regular expression can be optionally quoted.

.netrc:

macdef buildout:HOSTNAME
  RULE_1_REGEX
    RULE_1_NEW_URL HEADER1=VALUE1 HEADER2=VALUE2 ...
  RULE_2_REGEX
    RULE_2_NEW_URL ...
  ...

macdef buildout:OTHER_HOSTNAME
  ...

A rewriting rule is defined by a pair of lines with optional indentation
(only there for readability).

The first line of a rule is a regex that matches fully against
the path?query part of the URL.

If the second line is empty, the request isn't changed.
Else parts are parsed using shell-like syntax:
- the first one must produce the full URL to download
- the next ones are optional headers to send
- each part is subject to regex substitution using the
  Python Format Specification Mini-Language:
  captured grouped from the first line are numbered starting from 1,
  {0} is the base URL (scheme://location) and
  the optional `quote` attribute returns the urlencoded value

A use case is to work around
  https://gitlab.com/gitlab-org/gitlab/-/issues/19189
for example with the following .netrc:

    macdef buildout:lab.nexedi.com
      /(.+)/-/raw/([^/]+)/(.+)
        {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=<ACCESS_TOKEN>
parent f1ae9625
...@@ -22,12 +22,11 @@ try: ...@@ -22,12 +22,11 @@ try:
# Python 3 # Python 3
from urllib.error import HTTPError from urllib.error import HTTPError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from urllib.parse import urlparse, urlunparse from urllib.parse import urlparse, urlsplit, quote
except ImportError: except ImportError:
# Python 2 # Python 2
from urlparse import urlparse from urlparse import urlparse, urlsplit
from urlparse import urlunparse from urllib2 import HTTPError, Request, urlopen, quote
from urllib2 import HTTPError, Request, urlopen
from zc.buildout.easy_install import realpath from zc.buildout.easy_install import realpath
from base64 import b64encode from base64 import b64encode
...@@ -38,6 +37,7 @@ import netrc ...@@ -38,6 +37,7 @@ import netrc
import os import os
import os.path import os.path
import re import re
import shlex
import shutil import shutil
import tempfile import tempfile
import zc.buildout import zc.buildout
...@@ -60,6 +60,12 @@ class netrc(netrc.netrc): ...@@ -60,6 +60,12 @@ class netrc(netrc.netrc):
self.__init__(os.devnull) self.__init__(os.devnull)
return self.authenticators(host) return self.authenticators(host)
class Group(str):
@property
def quote(self):
return quote(self, '')
netrc = netrc() netrc = netrc()
class ChecksumError(zc.buildout.UserError): class ChecksumError(zc.buildout.UserError):
...@@ -273,25 +279,50 @@ class Download(object): ...@@ -273,25 +279,50 @@ class Download(object):
url_host, url_port = parsed[-2:] url_host, url_port = parsed[-2:]
return '%s:%s' % (url_host, url_port) return '%s:%s' % (url_host, url_port)
def _rewrite_url(self, base_url, path_query, line_list):
# line_list = list of line for selected macdef
for pattern, template in zip(*[iter(line_list)]*2):
match = re.match(pattern.strip() + '$', # PY3: re.fullmatch
path_query)
if match is not None:
group_list = [base_url]
group_list += map(Group, match.groups())
return [s.format(*group_list) for s in shlex.split(template)]
def _auth(self, url): def _auth(self, url):
parsed_url = urlparse(url) parsed_url = urlsplit(url)
if parsed_url.scheme in ('http', 'https'): if parsed_url.scheme in ('http', 'https'):
auth_host = parsed_url.netloc.rsplit('@', 1) auth_host = parsed_url.netloc.rsplit('@', 1)
if len(auth_host) > 1: if len(auth_host) > 1:
return (auth_host[0], return (auth_host[0],
parsed_url._replace(netloc=auth_host[1]).geturl()) None,
parsed_url._replace(netloc=auth_host[1]).geturl(),
(),
)
auth = netrc.authenticators(parsed_url.hostname) auth = netrc.authenticators(parsed_url.hostname)
if auth: if auth is None:
return '{0}:{2}'.format(*auth), url return
new = self._rewrite_url(
parsed_url._replace(path='', query='', fragment='').geturl(),
parsed_url._replace(scheme='', netloc='', fragment='').geturl(),
netrc.macros.get('buildout:' + parsed_url.hostname, ()),
) or [url]
return auth[0], auth[2], new.pop(0), new
def urlretrieve(self, url, tmp_path): def urlretrieve(self, url, tmp_path):
auth = self._auth(url) auth = self._auth(url)
if auth: if auth:
req = Request(auth[1]) req = Request(auth[2])
if url != auth[2]:
self.logger.info('Downloading from url: %s', auth[2])
for header in auth[3]:
req.add_header(*header.split('=', 1))
cred = auth[0] if auth[1] is None else ':'.join(auth[:2])
req.add_header("Authorization", req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth[0])))) "Basic " + bytes2str(b64encode(str2bytes(cred))))
else: else:
req = url req = url
with closing(urlopen(req)) as src: with closing(urlopen(req)) as src:
with open(tmp_path, 'wb') as dst: with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst) shutil.copyfileobj(src, dst)
......
...@@ -18,10 +18,12 @@ try: ...@@ -18,10 +18,12 @@ try:
# Python 3 # Python 3
from http.server import HTTPServer, BaseHTTPRequestHandler from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import urlopen from urllib.request import urlopen
from urllib.parse import urlparse, unquote, parse_qsl
except ImportError: except ImportError:
# Python 2 # Python 2
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urllib2 import urlopen from urllib2 import urlopen, unquote
from urlparse import urlparse, parse_qsl
import base64 import base64
import errno import errno
...@@ -410,6 +412,14 @@ class Handler(BaseHTTPRequestHandler): ...@@ -410,6 +412,14 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
self.wfile.write(out) self.wfile.write(out)
def forbidden():
self.send_response(403, 'Forbidden')
out = b'<html><body>Forbidden</body></html>'
self.send_header('Content-Length', str(len(out)))
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(out)
if self.path == '/enable_server_logging': if self.path == '/enable_server_logging':
self.__server.__log = True self.__server.__log = True
return k() return k()
...@@ -419,21 +429,32 @@ class Handler(BaseHTTPRequestHandler): ...@@ -419,21 +429,32 @@ class Handler(BaseHTTPRequestHandler):
return k() return k()
if self.path.startswith('/private/'): if self.path.startswith('/private/'):
parsed = urlparse(self.path)
auth = self.headers.get('Authorization') auth = self.headers.get('Authorization')
if auth and auth.startswith('Basic ') and \ if auth and auth.startswith('Basic ') and \
self.path[9:].encode() == base64.b64decode( parsed.path[9:].encode() == base64.b64decode(
self.headers.get('Authorization')[6:]): self.headers.get('Authorization')[6:]):
if parsed.query:
for h, v in parse_qsl(parsed.query):
if self.headers.get(h) != v:
return forbidden()
return k() return k()
# But not returning 401+WWW-Authenticate, we check that the client # But not returning 401+WWW-Authenticate, we check that the client
# skips auth challenge, which is not free (in terms of performance) # skips auth challenge, which is not free (in terms of performance)
# and useless for what we support. # and useless for what we support.
self.send_response(403, 'Forbidden') return forbidden()
out = '<html><body>Forbidden</body></html>'.encode() if self.path.startswith('/namespace/project/-/raw/master/'):
self.send_header('Content-Length', str(len(out))) # This path is private and need /api
self.send_header('Content-Type', 'text/html') return forbidden()
self.end_headers() if self.path.startswith('/api/v4/projects/namespace%2Fproject/repository/files/'):
self.wfile.write(out) # path is: .../files/FILE_PATH_XXXX/raw/?ref=master
return u = unquote(self.path.split('/')[7])
username, password = u.split(':')
token = self.headers.get('PRIVATE-TOKEN')
if password == token and username == "private_token":
return k()
return forbidden()
path = os.path.abspath(os.path.join(self.tree, *self.path.split('/'))) path = os.path.abspath(os.path.join(self.tree, *self.path.split('/')))
if not ( if not (
......
...@@ -183,6 +183,50 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden ...@@ -183,6 +183,50 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> is_temp; remove(path) >>> is_temp; remove(path)
True True
>>> os.environ['HOME'] = old_home >>> os.environ['HOME'] = old_home
>>> remove(netrc)
Gitlab private token file with netrc:
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> import os, zc.buildout.download
>>> old_home = os.environ['HOME']
>>> home = os.environ['HOME'] = tmpdir('test-laburl')
>>> netrc = join(home, '.netrc')
>>> c = "machine localhost\nlogin foo\npassword bar\n\nmacdef buildout:localhost\n\t/(.+)/foo/([^/]+)/bar \n\t\t{0}/bar/{1}/file/foo?ref={2} \n\t/(.+)/-/raw/([^/]+)/(.+) \n\t\t{0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=TOKENXXX\n\n"
>>> write(netrc, c)
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
Gitlab private raw with netrc and no macdef:
>>> remove(netrc)
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENPROJECT'
>>> write(netrc, 'machine localhost/namespace/project login private_token password TOKENPROJECT')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
netrc with macdef rewrite any url
>>> remove(netrc)
>>> c = 'machine localhost\nlogin foo\npassword bar\n\nmacdef buildout:localhost\n\t/(\w+)/redirect/(\w+):(\w+)/(\w+\s\d).* \n\t\t{0}/private/{2}:{3} h1={4.quote} h2={4} token=key token2="value 2"\n\n'
>>> write(netrc, c)
>>> url = server_url + 'notfound/redirect/foo:bar/test 2?h1=test%202&h2=test 2&token=key&token2=value 2'
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(url)
>>> is_temp; remove(path)
True
>>> os.environ['HOME'] = old_home
>>> remove(netrc)
Downloading using the download cache Downloading using the download cache
------------------------------------ ------------------------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment