Commit c8f3a774 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'git-archive' into 'master'

"git archive" download support

This needs changes in GitLab and gitlab_git too.

See merge request !2
parents f58f3acb 0b34879b
# gitlab-git-http-server
gitlab-git-http-server was designed to unload Git HTTP traffic from
the GitLab Rails app (Unicorn) to a separate daemon. All authentication
and authorization logic is still handled by the GitLab Rails app.
the GitLab Rails app (Unicorn) to a separate daemon. It also serves
'git archive' downloads for GitLab. All authentication and
authorization logic is still handled by the GitLab Rails app.
Architecture: Git client -> NGINX -> gitlab-git-http-server (makes
auth request to GitLab Rails app) -> git-upload-pack
......@@ -10,7 +11,7 @@ auth request to GitLab Rails app) -> git-upload-pack
## Usage
```
gitlab-git-http-server [OPTIONS] REPO_ROOT
gitlab-git-http-server [OPTIONS]
Options:
-authBackend string
......@@ -27,11 +28,13 @@ Options:
Print version and exit
```
gitlab-git-http-server allows Git HTTP clients to push and pull to and from Git
repositories under REPO_ROOT. Each incoming request is first replayed (with an
empty request body) to an external authentication/authorization HTTP server:
the 'auth backend'. The auth backend is expected to be a GitLab Unicorn
process.
gitlab-git-http-server allows Git HTTP clients to push and pull to
and from Git repositories. Each incoming request is first replayed
(with an empty request body) to an external authentication/authorization
HTTP server: the 'auth backend'. The auth backend is expected to
be a GitLab Unicorn process. The 'auth response' is a JSON message
which tells gitlab-git-http-server the path of the Git repository
to read from/write to.
gitlab-git-http-server can listen on either a TCP or a Unix domain socket. It
can also open a second listening TCP listening socket with the Go
......@@ -63,14 +66,19 @@ You can try out the Git server without authentication as follows:
```
# Start a fake auth backend that allows everything/everybody
go run support/say-yes.go &
make test/data/test.git
go run support/fake-auth-backend.go ~+/test/data/test.git &
# Start gitlab-git-http-server
go build && ./gitlab-git-http-server /path/to/git-repos
make
./gitlab-git-http-server
```
Now if you have a Git repository in `/path/to/git-repos/my-repo.git`,
you can push to and pull from it at the URL
`http://localhost:8181/my-repo.git`.
Now you can try things like:
```
git clone http://localhost:8181/test.git
curl -JO http://localhost:8181/test/repository/archive.zip
```
## Example request flow
......
......@@ -11,6 +11,7 @@ import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
......@@ -18,23 +19,40 @@ import (
"path"
"strings"
"syscall"
"time"
)
type gitHandler struct {
httpClient *http.Client
repoRoot string
authBackend string
}
type gitService struct {
method string
suffix string
handleFunc func(gitEnv, string, string, http.ResponseWriter, *http.Request)
handleFunc func(w http.ResponseWriter, r *gitRequest, rpc string)
rpc string
}
type gitEnv struct {
// A gitReqest is an *http.Request decorated with attributes returned by the
// GitLab Rails application.
type gitRequest struct {
*http.Request
// GL_ID is an environment variable used by gitlab-shell hooks during 'git
// push' and 'git pull'
GL_ID string
// RepoPath is the full path on disk to the Git repository the request is
// about
RepoPath string
// ArchivePath is the full path where we should find/create a cached copy
// of a requested archive
ArchivePath string
// ArchivePrefix is used to put extracted archive contents in a
// subdirectory
ArchivePrefix string
// CommitId is used do prevent race conditions between the 'time of check'
// in the GitLab Rails app and the 'time of use' in gitlab-git-http-server.
CommitId string
}
// Routing table
......@@ -42,14 +60,18 @@ var gitServices = [...]gitService{
gitService{"GET", "/info/refs", handleGetInfoRefs, ""},
gitService{"POST", "/git-upload-pack", handlePostRPC, "git-upload-pack"},
gitService{"POST", "/git-receive-pack", handlePostRPC, "git-receive-pack"},
gitService{"GET", "/repository/archive", handleGetArchive, "tar.gz"},
gitService{"GET", "/repository/archive.zip", handleGetArchive, "zip"},
gitService{"GET", "/repository/archive.tar", handleGetArchive, "tar"},
gitService{"GET", "/repository/archive.tar.gz", handleGetArchive, "tar.gz"},
gitService{"GET", "/repository/archive.tar.bz2", handleGetArchive, "tar.bz2"},
}
func newGitHandler(repoRoot, authBackend string) *gitHandler {
return &gitHandler{&http.Client{}, repoRoot, authBackend}
func newGitHandler(authBackend string) *gitHandler {
return &gitHandler{&http.Client{}, authBackend}
}
func (h *gitHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
var env gitEnv
var g gitService
log.Printf("%s %q", r.Method, r.URL)
......@@ -92,11 +114,11 @@ func (h *gitHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
return
}
// The auth backend validated the client request and told us who
// the user is according to them (GL_ID). We must extract this
// information from the auth response body.
dec := json.NewDecoder(authResponse.Body)
if err := dec.Decode(&env); err != nil {
// The auth backend validated the client request and told us additional
// request metadata. We must extract this information from the auth
// response body.
gitReq := &gitRequest{Request: r}
if err := json.NewDecoder(authResponse.Body).Decode(gitReq); err != nil {
fail500(w, "decode JSON GL_ID", err)
return
}
......@@ -112,20 +134,12 @@ func (h *gitHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}
}
// About path traversal: the Go net/http HTTP server, or
// rather ServeMux, makes the following promise: "ServeMux
// also takes care of sanitizing the URL request path, redirecting
// any request containing . or .. elements to an equivalent
// .- and ..-free URL.". In other words, we may assume that
// r.URL.Path does not contain '/../', so there is no possibility
// of path traversal here.
repoPath := path.Join(h.repoRoot, strings.TrimSuffix(r.URL.Path, g.suffix))
if !looksLikeRepo(repoPath) {
if !looksLikeRepo(gitReq.RepoPath) {
http.Error(w, "Not Found", 404)
return
}
g.handleFunc(env, g.rpc, repoPath, w, r)
g.handleFunc(w, gitReq, g.rpc)
}
func looksLikeRepo(p string) bool {
......@@ -159,7 +173,7 @@ func (h *gitHandler) doAuthRequest(r *http.Request) (result *http.Response, err
return h.httpClient.Do(authReq)
}
func handleGetInfoRefs(env gitEnv, _ string, path string, w http.ResponseWriter, r *http.Request) {
func handleGetInfoRefs(w http.ResponseWriter, r *gitRequest, _ string) {
rpc := r.URL.Query().Get("service")
if !(rpc == "git-upload-pack" || rpc == "git-receive-pack") {
// The 'dumb' Git HTTP protocol is not supported
......@@ -168,7 +182,7 @@ func handleGetInfoRefs(env gitEnv, _ string, path string, w http.ResponseWriter,
}
// Prepare our Git subprocess
cmd := gitCommand(env, "git", subCommand(rpc), "--stateless-rpc", "--advertise-refs", path)
cmd := gitCommand(r.GL_ID, "git", subCommand(rpc), "--stateless-rpc", "--advertise-refs", r.RepoPath)
stdout, err := cmd.StdoutPipe()
if err != nil {
fail500(w, "handleGetInfoRefs", err)
......@@ -203,7 +217,136 @@ func handleGetInfoRefs(env gitEnv, _ string, path string, w http.ResponseWriter,
}
}
func handlePostRPC(env gitEnv, rpc string, path string, w http.ResponseWriter, r *http.Request) {
func handleGetArchive(w http.ResponseWriter, r *gitRequest, format string) {
archiveFilename := path.Base(r.ArchivePath)
if cachedArchive, err := os.Open(r.ArchivePath); err == nil {
defer cachedArchive.Close()
log.Printf("Serving cached file %q", r.ArchivePath)
setArchiveHeaders(w, format, archiveFilename)
// Even if somebody deleted the cachedArchive from disk since we opened
// the file, Unix file semantics guarantee we can still read from the
// open file in this process.
http.ServeContent(w, r.Request, "", time.Unix(0, 0), cachedArchive)
return
}
// We assume the tempFile has a unique name so that concurrent requests are
// safe. We create the tempfile in the same directory as the final cached
// archive we want to create so that we can use an atomic link(2) operation
// to finalize the cached archive.
tempFile, err := prepareArchiveTempfile(path.Dir(r.ArchivePath),
archiveFilename)
if err != nil {
fail500(w, "handleGetArchive create tempfile for archive", err)
}
defer tempFile.Close()
defer os.Remove(tempFile.Name())
compressCmd, archiveFormat := parseArchiveFormat(format)
archiveCmd := gitCommand("", "git", "--git-dir="+r.RepoPath, "archive", "--format="+archiveFormat, "--prefix="+r.ArchivePrefix+"/", r.CommitId)
archiveStdout, err := archiveCmd.StdoutPipe()
if err != nil {
fail500(w, "handleGetArchive", err)
return
}
defer archiveStdout.Close()
if err := archiveCmd.Start(); err != nil {
fail500(w, "handleGetArchive", err)
return
}
defer cleanUpProcessGroup(archiveCmd) // Ensure brute force subprocess clean-up
var stdout io.ReadCloser
if compressCmd == nil {
stdout = archiveStdout
} else {
compressCmd.Stdin = archiveStdout
stdout, err = compressCmd.StdoutPipe()
if err != nil {
fail500(w, "handleGetArchive compressCmd stdout pipe", err)
return
}
defer stdout.Close()
if err := compressCmd.Start(); err != nil {
fail500(w, "handleGetArchive start compressCmd process", err)
return
}
defer compressCmd.Wait()
archiveStdout.Close()
}
// Every Read() from stdout will be synchronously written to tempFile
// before it comes out the TeeReader.
archiveReader := io.TeeReader(stdout, tempFile)
// Start writing the response
setArchiveHeaders(w, format, archiveFilename)
w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return
if _, err := io.Copy(w, archiveReader); err != nil {
logContext("handleGetArchive read from subprocess", err)
return
}
if err := archiveCmd.Wait(); err != nil {
logContext("handleGetArchive wait for archiveCmd", err)
return
}
if compressCmd != nil {
if err := compressCmd.Wait(); err != nil {
logContext("handleGetArchive wait for compressCmd", err)
return
}
}
if err := finalizeCachedArchive(tempFile, r.ArchivePath); err != nil {
logContext("handleGetArchive finalize cached archive", err)
return
}
}
func setArchiveHeaders(w http.ResponseWriter, format string, archiveFilename string) {
w.Header().Add("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, archiveFilename))
if format == "zip" {
w.Header().Add("Content-Type", "application/zip")
} else {
w.Header().Add("Content-Type", "application/octet-stream")
}
w.Header().Add("Content-Transfer-Encoding", "binary")
w.Header().Add("Cache-Control", "private")
}
func parseArchiveFormat(format string) (*exec.Cmd, string) {
switch format {
case "tar":
return nil, "tar"
case "tar.gz":
return exec.Command("gzip", "-c", "-n"), "tar"
case "tar.bz2":
return exec.Command("bzip2", "-c"), "tar"
case "zip":
return nil, "zip"
}
return nil, "unknown"
}
func prepareArchiveTempfile(dir string, prefix string) (*os.File, error) {
if err := os.MkdirAll(dir, 0700); err != nil {
return nil, err
}
return ioutil.TempFile(dir, prefix)
}
func finalizeCachedArchive(tempFile *os.File, archivePath string) error {
if err := tempFile.Close(); err != nil {
return err
}
return os.Link(tempFile.Name(), archivePath)
}
func handlePostRPC(w http.ResponseWriter, r *gitRequest, rpc string) {
var body io.ReadCloser
var err error
......@@ -220,7 +363,7 @@ func handlePostRPC(env gitEnv, rpc string, path string, w http.ResponseWriter, r
defer body.Close()
// Prepare our Git subprocess
cmd := gitCommand(env, "git", subCommand(rpc), "--stateless-rpc", path)
cmd := gitCommand(r.GL_ID, "git", subCommand(rpc), "--stateless-rpc", r.RepoPath)
stdout, err := cmd.StdoutPipe()
if err != nil {
fail500(w, "handlePostRPC", err)
......@@ -284,14 +427,14 @@ func subCommand(rpc string) string {
return strings.TrimPrefix(rpc, "git-")
}
func gitCommand(env gitEnv, name string, args ...string) *exec.Cmd {
func gitCommand(gl_id string, name string, args ...string) *exec.Cmd {
cmd := exec.Command(name, args...)
// Start the command in its own process group (nice for signalling)
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
// Explicitly set the environment for the Git command
cmd.Env = []string{
fmt.Sprintf("PATH=%s", os.Getenv("PATH")),
fmt.Sprintf("GL_ID=%s", env.GL_ID),
fmt.Sprintf("GL_ID=%s", gl_id),
}
// If we don't do something with cmd.Stderr, Git errors will be lost
cmd.Stderr = os.Stderr
......
......@@ -36,22 +36,18 @@ func main() {
pprofListenAddr := flag.String("pprofListenAddr", "", "pprof listening address, e.g. 'localhost:6060'")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
fmt.Fprintf(os.Stderr, "\n %s [OPTIONS] REPO_ROOT\n\nOptions:\n", os.Args[0])
fmt.Fprintf(os.Stderr, "\n %s [OPTIONS]\n\nOptions:\n", os.Args[0])
flag.PrintDefaults()
}
flag.Parse()
version := fmt.Sprintf("gitlab-git-http-server %s", Version)
if *printVersion {
fmt.Printf("gitlab-git-http-server %s\n", Version)
fmt.Println(version)
os.Exit(0)
}
repoRoot := flag.Arg(0)
if repoRoot == "" {
flag.Usage()
os.Exit(1)
}
log.Printf("repoRoot: %s", repoRoot)
log.Printf("Starting %s", version)
// Good housekeeping for Unix sockets: unlink before binding
if *listenNetwork == "unix" {
......@@ -81,6 +77,6 @@ func main() {
// Because net/http/pprof installs itself in the DefaultServeMux
// we create a fresh one for the Git server.
serveMux := http.NewServeMux()
serveMux.Handle("/", newGitHandler(repoRoot, *authBackend))
serveMux.Handle("/", newGitHandler(*authBackend))
log.Fatal(http.Serve(listener, serveMux))
}
package main
import (
"bytes"
"fmt"
"io/ioutil"
"net"
"net/http"
"net/http/httptest"
......@@ -19,9 +21,11 @@ const servWaitSleep = 100 // milliseconds sleep interval
const scratchDir = "test/scratch"
const testRepoRoot = "test/data"
const testRepo = "test.git"
const testProject = "test"
var remote = fmt.Sprintf("http://%s/%s", servAddr, testRepo)
var checkoutDir = path.Join(scratchDir, "test")
var cacheDir = path.Join(scratchDir, "cache")
func TestAllowedClone(t *testing.T) {
// Prepare clone directory
......@@ -30,7 +34,7 @@ func TestAllowedClone(t *testing.T) {
}
// Prepare test server and backend
ts := testAuthServer(200, `{"GL_ID":"user-123"}`)
ts := testAuthServer(200, gitOkBody(t))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
......@@ -68,7 +72,7 @@ func TestAllowedPush(t *testing.T) {
preparePushRepo(t)
// Prepare the test server and backend
ts := testAuthServer(200, `{"GL_ID":"user-123"}`)
ts := testAuthServer(200, gitOkBody(t))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
......@@ -96,6 +100,154 @@ func TestDeniedPush(t *testing.T) {
}
}
func TestAllowedDownloadZip(t *testing.T) {
prepareDownloadDir(t)
// Prepare test server and backend
archiveName := "foobar.zip"
ts := testAuthServer(200, archiveOkBody(t, archiveName))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
downloadCmd := exec.Command("curl", "-J", "-O", fmt.Sprintf("http://%s/%s/repository/archive.zip", servAddr, testProject))
downloadCmd.Dir = scratchDir
runOrFail(t, downloadCmd)
extractCmd := exec.Command("unzip", archiveName)
extractCmd.Dir = scratchDir
runOrFail(t, extractCmd)
}
func TestAllowedDownloadTar(t *testing.T) {
prepareDownloadDir(t)
// Prepare test server and backend
archiveName := "foobar.tar"
ts := testAuthServer(200, archiveOkBody(t, archiveName))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
downloadCmd := exec.Command("curl", "-J", "-O", fmt.Sprintf("http://%s/%s/repository/archive.tar", servAddr, testProject))
downloadCmd.Dir = scratchDir
runOrFail(t, downloadCmd)
extractCmd := exec.Command("tar", "xf", archiveName)
extractCmd.Dir = scratchDir
runOrFail(t, extractCmd)
}
func TestAllowedDownloadTarGz(t *testing.T) {
prepareDownloadDir(t)
// Prepare test server and backend
archiveName := "foobar.tar.gz"
ts := testAuthServer(200, archiveOkBody(t, archiveName))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
downloadCmd := exec.Command("curl", "-J", "-O", fmt.Sprintf("http://%s/%s/repository/archive.tar.gz", servAddr, testProject))
downloadCmd.Dir = scratchDir
runOrFail(t, downloadCmd)
extractCmd := exec.Command("tar", "zxf", archiveName)
extractCmd.Dir = scratchDir
runOrFail(t, extractCmd)
}
func TestAllowedDownloadTarBz2(t *testing.T) {
prepareDownloadDir(t)
// Prepare test server and backend
archiveName := "foobar.tar.bz2"
ts := testAuthServer(200, archiveOkBody(t, archiveName))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
downloadCmd := exec.Command("curl", "-J", "-O", fmt.Sprintf("http://%s/%s/repository/archive.tar.bz2", servAddr, testProject))
downloadCmd.Dir = scratchDir
runOrFail(t, downloadCmd)
extractCmd := exec.Command("tar", "jxf", archiveName)
extractCmd.Dir = scratchDir
runOrFail(t, extractCmd)
}
func TestAllowedApiDownloadZip(t *testing.T) {
prepareDownloadDir(t)
// Prepare test server and backend
archiveName := "foobar.zip"
ts := testAuthServer(200, archiveOkBody(t, archiveName))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
downloadCmd := exec.Command("curl", "-J", "-O", fmt.Sprintf("http://%s/api/v3/projects/123/repository/archive.zip", servAddr))
downloadCmd.Dir = scratchDir
runOrFail(t, downloadCmd)
extractCmd := exec.Command("unzip", archiveName)
extractCmd.Dir = scratchDir
runOrFail(t, extractCmd)
}
func TestDownloadCacheHit(t *testing.T) {
prepareDownloadDir(t)
// Prepare test server and backend
archiveName := "foobar.zip"
ts := testAuthServer(200, archiveOkBody(t, archiveName))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
if err := os.MkdirAll(cacheDir, 0755); err != nil {
t.Fatal(err)
}
cachedContent := []byte{'c', 'a', 'c', 'h', 'e', 'd'}
if err := ioutil.WriteFile(path.Join(cacheDir, archiveName), cachedContent, 0644); err != nil {
t.Fatal(err)
}
downloadCmd := exec.Command("curl", "-J", "-O", fmt.Sprintf("http://%s/api/v3/projects/123/repository/archive.zip", servAddr))
downloadCmd.Dir = scratchDir
runOrFail(t, downloadCmd)
actual, err := ioutil.ReadFile(path.Join(scratchDir, archiveName))
if err != nil {
t.Fatal(err)
}
if bytes.Compare(actual, cachedContent) != 0 {
t.Fatal("Unexpected file contents in download")
}
}
func TestDownloadCacheCreate(t *testing.T) {
prepareDownloadDir(t)
// Prepare test server and backend
archiveName := "foobar.zip"
ts := testAuthServer(200, archiveOkBody(t, archiveName))
defer ts.Close()
defer cleanUpProcessGroup(startServerOrFail(t, ts))
downloadCmd := exec.Command("curl", "-J", "-O", fmt.Sprintf("http://%s/api/v3/projects/123/repository/archive.zip", servAddr))
downloadCmd.Dir = scratchDir
runOrFail(t, downloadCmd)
compareCmd := exec.Command("cmp", path.Join(cacheDir, archiveName), path.Join(scratchDir, archiveName))
if err := compareCmd.Run(); err != nil {
t.Fatalf("Comparison between downloaded file and cache item failed: %s", err)
}
}
func prepareDownloadDir(t *testing.T) {
if err := os.RemoveAll(scratchDir); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(scratchDir, 0755); err != nil {
t.Fatal(err)
}
}
func preparePushRepo(t *testing.T) {
if err := os.RemoveAll(scratchDir); err != nil {
t.Fatal(err)
......@@ -117,7 +269,7 @@ func testAuthServer(code int, body string) *httptest.Server {
}
func startServerOrFail(t *testing.T, ts *httptest.Server) *exec.Cmd {
cmd := exec.Command("go", "run", "main.go", "githandler.go", fmt.Sprintf("-authBackend=%s", ts.URL), fmt.Sprintf("-listenAddr=%s", servAddr), testRepoRoot)
cmd := exec.Command("go", "run", "main.go", "githandler.go", fmt.Sprintf("-authBackend=%s", ts.URL), fmt.Sprintf("-listenAddr=%s", servAddr))
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
......@@ -154,3 +306,30 @@ func runOrFail(t *testing.T, cmd *exec.Cmd) {
t.Fatal(err)
}
}
func gitOkBody(t *testing.T) string {
return fmt.Sprintf(`{"GL_ID":"user-123","RepoPath":"%s"}`, repoPath(t))
}
func archiveOkBody(t *testing.T, archiveName string) string {
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
archivePath := path.Join(cwd, cacheDir, archiveName)
jsonString := `{
"RepoPath":"%s",
"ArchivePath":"%s",
"CommitId":"c7fbe50c7c7419d9701eebe64b1fdacc3df5b9dd",
"ArchivePrefix":"foobar123"
}`
return fmt.Sprintf(jsonString, repoPath(t), archivePath)
}
func repoPath(t *testing.T) string {
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
return path.Join(cwd, testRepoRoot, testRepo)
}
......@@ -4,11 +4,17 @@ import (
"fmt"
"log"
"net/http"
"os"
)
func main() {
if len(os.Args) == 1 {
fmt.Fprintf(os.Stderr, "Usage: %s /path/to/test-repo.git\n", os.Args[0])
os.Exit(1)
}
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `{"GL_ID":""}`)
fmt.Fprintf(w, `{"RepoPath":"%s","ArchivePath":"%s"}`, os.Args[1], r.URL.Path)
})
log.Fatal(http.ListenAndServe("localhost:8080", nil))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment