Commit 6d611970 authored by Alain Takoudjou's avatar Alain Takoudjou

[feat] Try to download gitlab private raw files from gitlab API

If fail to download raw file, check if possible to make download from
API, this is possible is username and password is provided from URL
username should be PRIVATE-TOKEN (this is the username for private token)
parent fb45b3e5
......@@ -50,6 +50,7 @@ setup(
'setuptools>=38.2.3',
'pip',
'wheel',
'lxml',
],
include_package_data = True,
entry_points = entry_points,
......
......@@ -22,12 +22,13 @@ try:
# Python 3
from urllib.error import HTTPError
from urllib.request import Request, urlopen
from urllib.parse import urlparse, urlunparse
from urllib.parse import urlparse, urlunparse, quote, urlencode
except ImportError:
# Python 2
from urlparse import urlparse
from urlparse import urlunparse
from urllib2 import HTTPError, Request, urlopen
from urllib2 import HTTPError, Request, urlopen, quote
from urllib import urlencode
from zc.buildout.easy_install import realpath
from base64 import b64encode
......@@ -43,7 +44,7 @@ import tempfile
import zc.buildout
from . import bytes2str, str2bytes
from .rmtree import rmtree
from lxml.html import parse as lxmlparse
class netrc(netrc.netrc):
......@@ -65,6 +66,9 @@ netrc = netrc()
class ChecksumError(zc.buildout.UserError):
pass
class GitlabAccessDeniedError(zc.buildout.UserError):
pass
class Download(object):
"""Configurable download utility.
......@@ -239,6 +243,13 @@ class Download(object):
self.logger.info('using alternate URL: %s', alternate_url)
download_url = alternate_url
self.urlretrieve(alternate_url, path)
except GitlabAccessDeniedError:
header_dict, laburl = self._labraw_authproxy(url)
if len(header_dict.keys()) > 0:
# gitlab url, try from API
self.urlretrieve(laburl, path, headers=header_dict)
else:
raise
if not check_md5sum(path, md5sum):
raise ChecksumError('MD5 checksum mismatch downloading %r'
% download_url)
......@@ -284,15 +295,59 @@ class Download(object):
if auth:
return '{0}:{2}'.format(*auth), url
def urlretrieve(self, url, tmp_path):
def _labraw_authproxy(self, url): # -> url'
header_dict = {}
# url should be https://XXX.YYY/namespace/project/[-/]raw/....
if not re.match(r"https://[\w\-_\.\:\@\+]+/([\.\w\-\+_]+/[\.\w\-\+_]+/(-/){0,1}raw/)", url):
return header_dict, url
p = urlparse(url)
pathv = p.path.split('/')
if p.username == "PRIVATE-TOKEN" and p.password:
header_dict["PRIVATE-TOKEN"] = p.password
repo = '/'.join(pathv[1:3])
# FIXME this does not support refs like y/bstr.
# To support this we will need to do what
# https://lab.nexedi.com/nexedi/gitlab-workhorse/commit/5b8cf10e
# was doing - try to extract all variants for ref from longest to
# shortest and stop on the first variant thay yields good result.
if pathv[3] == '-': # the url is like .../-/raw/...
ref = pathv[5]
filepath = '/'.join(pathv[6:])
else:
ref = pathv[4]
filepath = '/'.join(pathv[5:])
qrepo = quote(repo, '')
qfilepath = quote(filepath, '')
path = '/api/v4/projects/%s/repository/files/%s/raw' % (qrepo, qfilepath)
query = urlencode({'ref': ref})
netloc = '%s:%s' % (p.hostname, p.port) if p.port else p.hostname
return header_dict, urlunparse((p.scheme, netloc, path, p.params, query, p.fragment))
def urlretrieve(self, url, tmp_path, headers={}):
auth = self._auth(url)
if auth:
req = Request(auth[1])
req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth[0]))))
else:
req = url
req = Request(url)
for k, v in headers.items():
req.add_header(k, v)
with closing(urlopen(req)) as src:
# Is this a gitlab raw URL ?
# Gitlab return to sign in page with code 200 if authentication failed.
if re.match(r"https://[\w\-_\.\:\@\+]+/([\.\w\-\+_]+/[\.\w\-\+_]+/(-/){0,1}raw/)", url):
parsed = lxmlparse(src)
page_title = parsed.find(".//title")
if page_title is not None and page_title.text.startswith("Sign in"):
# the content is gitlab Sign in page
raise GitlabAccessDeniedError("You have been redirected to Sign in page")
with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst)
return tmp_path, src.info()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment