diff --git a/internal/api/auth.go b/internal/api/auth.go new file mode 100644 index 0000000000000000000000000000000000000000..f8c43b750132bbde6d17ad254e8c1bc29013acd3 --- /dev/null +++ b/internal/api/auth.go @@ -0,0 +1,85 @@ +// API calls related to authentication +package api + +import ( + "../badgateway" + "../helper" + proxypkg "../proxy" + "../senddata" + "../sendfile" + "fmt" + "net/http" + "net/http/httptest" +) + +// Reply from auth backend, e.g. for "download from repo" authorization request +type AuthReply struct { + // raw reply from auth backend & PreAuthorizeHandler(). + // recorded so we can replay it to client in full + // if e.g. access is rejected. + RawReply *httptest.ResponseRecorder + + // decoded auth reply + Response +} + +// for detecting whether archive download is ok via senddata mechanism +type testDownloadOkViaSendArchive struct { + senddata.Prefix + authReply *AuthReply +} + +func (aok *testDownloadOkViaSendArchive) Inject(w http.ResponseWriter, r *http.Request, sendData string) { + var param struct{ RepoPath string } + if err := aok.Unpack(¶m, sendData); err != nil { + helper.Fail500(w, fmt.Errorf("testDownloadOkViaSendArchive: unpack sendData: %v", err)) + return + } + + // if we ever get to this point - auth handler approved + // access and thus it is ok to download + aok.authReply.RepoPath = param.RepoPath +} + +// Ask auth backend about whether download is ok for a project. +// Authorization is approved if AuthReply.RepoPath != "" on return +// Raw auth backend response is emitted to AuthReply.RawReply +func (a *API) VerifyDownloadAccess(project, query string, header http.Header) AuthReply { + authReply := AuthReply{ + RawReply: httptest.NewRecorder(), + } + + // Request to auth backend to verify whether download is possible. + // - first option is via asking as `git fetch` would do, but on Rails + // side this supports only basic auth, not private token. + // - that's why we auth backend to authenticate as if it was request to + // get repo archive and propagate request query and header. + // url := project + ".git/info/refs?service=git-upload-pack" + url := project + "/repository/archive.zip" + if query != "" { + url += "?" + query + } + reqDownloadAccess, err := http.NewRequest("GET", url, nil) + if err != nil { + helper.Fail500(authReply.RawReply, fmt.Errorf("GET git-upload-pack: %v", err)) + return authReply + } + for k, v := range header { + reqDownloadAccess.Header[k] = v + } + + // Send request to auth backend and detect via aok senddata hook + // whether access is permitted. + aok := &testDownloadOkViaSendArchive{"git-archive:", &authReply} + authProxy := senddata.SendData( + sendfile.SendFile(proxypkg.NewProxy( + a.URL, + a.Version, + a.Client.Transport.(*badgateway.RoundTripper), + )), + aok, + ) + authProxy.ServeHTTP(authReply.RawReply, reqDownloadAccess) + + return authReply +} diff --git a/internal/git/xblob.go b/internal/git/xblob.go new file mode 100644 index 0000000000000000000000000000000000000000..9610edfcd07faacb435f0e1720df07e40bbe22a2 --- /dev/null +++ b/internal/git/xblob.go @@ -0,0 +1,172 @@ +// Handler for raw blob downloads +// +// Blobs are read via `git cat-file ...` with first querying authentication +// backend about download-access permission for containing repository. + +package git + +import ( + "../api" + "../helper" + "bufio" + "errors" + "fmt" + "io" + "log" + "net/http" + "regexp" + "strings" +) + +// HTTP handler for `.../raw/<ref>/path` +func GetBlobRaw(a *api.API) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + handleGetBlobRaw(a, w, r) + }) +} + +var rawRe = regexp.MustCompile(`/raw/`) + +func handleGetBlobRaw(a *api.API, w http.ResponseWriter, r *http.Request) { + // Extract project & refpath + // <project>/raw/branch/file -> <project>, branch/file + u := r.URL + rawLoc := rawRe.FindStringIndex(u.Path) + if rawLoc == nil { + helper.Fail500(w, errors.New("extract project name")) + return + } + project := u.Path[:rawLoc[0]] + refpath := u.Path[rawLoc[1]:] + + // Query download access auth for this project + authReply := a.VerifyDownloadAccess(project, u.RawQuery, r.Header) + if authReply.RepoPath == "" { + // access denied - copy auth reply to client in full - + // there are HTTP code and other headers / body relevant for + // about why access was denied. + for k, v := range authReply.RawReply.HeaderMap { + w.Header()[k] = v + } + w.WriteHeader(authReply.RawReply.Code) + _, err := io.Copy(w, authReply.RawReply.Body) + if err != nil { + helper.LogError(fmt.Errorf("writing authReply.RawReply.Body: %v", err)) + } + return + } + + // Access granted - we can emit the blob + emitBlob(w, authReply.RepoPath, refpath) +} + +// Emit content of blob located at <ref>/path (jointly denoted as 'refpath') to output +func emitBlob(w http.ResponseWriter, repopath string, refpath string) { + // Communicate with `git cat-file --batch` trying refs from longest + // to shortest prefix in refpath. This way we find longest-match for + // ref and get blob sha1 and content in the end. + queryCmd := gitCommand("", "", "git", "--git-dir="+repopath, "cat-file", "--batch") + queryStdin, err := queryCmd.StdinPipe() + if err != nil { + helper.Fail500(w, fmt.Errorf("git cat-file --batch; stdin: %v", err)) + return + } + defer queryStdin.Close() + queryStdout, err := queryCmd.StdoutPipe() + if err != nil { + helper.Fail500(w, fmt.Errorf("git cat-file --batch; stdout: %v", err)) + return + } + defer queryStdout.Close() + queryReader := bufio.NewReader(queryStdout) + + err = queryCmd.Start() + if err != nil { + helper.Fail500(w, fmt.Errorf("git cat-file --batch; start: %v", err)) + return + } + defer helper.CleanUpProcessGroup(queryCmd) + + // refpath components as vector + refpathv := strings.Split(refpath, "/") + + // scan from right to left and try to change '/' -> ':' and see if it + // creates a correct git object name. If it does - we read object + // content which follows. + var sha1, type_ string + var size int64 + for i := len(refpathv); i > 0; i-- { + ref := strings.Join(refpathv[:i], "/") + path := strings.Join(refpathv[i:], "/") + _, err := fmt.Fprintf(queryStdin, "%s:%s\n", ref, path) + if err != nil { + helper.Fail500(w, fmt.Errorf("git cat-file --batch; write: %v", err)) + return + } + + reply, err := queryReader.ReadString('\n') + if err != nil { + helper.Fail500(w, fmt.Errorf("git cat-file --batch; read: %v", err)) + return + } + + // <object> SP missing LF + if strings.HasSuffix(reply, " missing\n") { + continue + } + + // <sha1> SP <type> SP <size> LF + _, err = fmt.Sscanf(reply, "%s %s %d\n", &sha1, &type_, &size) + if err != nil { + helper.Fail500(w, fmt.Errorf("git cat-file --batch; reply parse: %v", err)) + return + } + + if type_ != "blob" { + log.Printf("git cat-file --batch-check; %v is not blob (is %v)", sha1, type_) + sha1 = "" // so it will return 404 + } + + // git object found + break + } + + // Blob not found -> 404 + if sha1 == "" { + http.Error(w, fmt.Sprintf("Blob for %v not found", refpath), http.StatusNotFound) + return + } + + // Blob found - start writing response + w.Header().Set("Content-Disposition", "inline") + w.Header().Set("Content-Transfer-Encoding", "binary") + w.Header().Set("Content-Length", fmt.Sprintf("%d", size)) + w.Header().Set("X-Content-Type-Options", "nosniff") + // net/http sniffs stream and automatically detects and sets + // Content-Type header. We do not have to do it ourselves. + + w.Header().Set("Cache-Control", "private") // Rails sets this for IE compatibility + w.Header().Set("ETag", fmt.Sprintf(`"%s"`, sha1)) + + w.WriteHeader(http.StatusOK) // Don't bother with HTTP 500 from this point on, just return + // XXX better use queryStdout instead of queryReader, but we could be + // holding some tail bytes in queryReader after chat phase + _, err = io.CopyN(w, queryReader, size) + if err != nil { + helper.LogError(fmt.Errorf("io.CopyN: %v", err)) + return + } + + // close git stdin explicitly, so it can exit cleanly + err = queryStdin.Close() + if err != nil { + helper.LogError(fmt.Errorf("queryStdin.Close: %v", err)) + return + } + + err = queryCmd.Wait() + if err != nil { + helper.LogError(fmt.Errorf("wait: %v", err)) + return + } +} diff --git a/internal/upstream/routes.go b/internal/upstream/routes.go index 453324d4d4d4aea7e778c2e3516cc450d541fc67..39eaec8fd1ca57b74d98eccc5b5c4bba0861fbf8 100644 --- a/internal/upstream/routes.go +++ b/internal/upstream/routes.go @@ -182,6 +182,9 @@ func (u *upstream) configureRoutes() { route("POST", gitProjectPattern+`git-receive-pack\z`, contentEncodingHandler(git.ReceivePack(api)), withMatcher(isContentType("application/x-git-receive-pack-request"))), route("PUT", gitProjectPattern+`gitlab-lfs/objects/([0-9a-f]{64})/([0-9]+)\z`, lfs.PutStore(api, signingProxy), withMatcher(isContentType("application/octet-stream"))), + // Raw blobs + route{"GET", regexp.MustCompile(projectPattern + `raw/`), git.GetBlobRaw(api)}, + // CI Artifacts route("POST", apiPattern+`v4/jobs/[0-9]+/artifacts\z`, contentEncodingHandler(artifacts.UploadArtifacts(api, proxy))), route("POST", ciAPIPattern+`v1/builds/[0-9]+/artifacts\z`, contentEncodingHandler(artifacts.UploadArtifacts(api, proxy))), diff --git a/main_test.go b/main_test.go index c1a7e5b7af23a877b10636f618f72f97b52eeac2..0b4c10edf3362e182aa206520df5f39b2ff310aa 100644 --- a/main_test.go +++ b/main_test.go @@ -3,12 +3,15 @@ package main import ( "bytes" "compress/gzip" + "crypto/sha1" "encoding/base64" "encoding/json" "fmt" "io/ioutil" "net/http" "net/http/httptest" + "net/http/httputil" + "net/url" "os" "os/exec" "path" @@ -593,4 +596,153 @@ func httpPost(t *testing.T, url string, headers map[string]string, reqBody []byt func assertNginxResponseBuffering(t *testing.T, expected string, resp *http.Response, msgAndArgs ...interface{}) { actual := resp.Header.Get(helper.NginxResponseBufferHeader) assert.Equal(t, expected, actual, msgAndArgs...) + +// sha1(data) as human-readable string +func sha1s(data []byte) string { + return fmt.Sprintf("%x", sha1.Sum(data)) +} + +// download an URL +func download(t *testing.T, url string, h http.Header) (*http.Response, []byte) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + t.Fatal(err) + } + // copy header to request + for k, v := range h { + req.Header[k] = v + } + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + return resp, body +} + +// Context for downloading & verifying paths under URL prefix +type DownloadContext struct { + t *testing.T + urlPrefix string + Header http.Header +} + +func NewDownloadContext(t *testing.T, urlPrefix string) *DownloadContext { + h := make(http.Header) + return &DownloadContext{t, urlPrefix, h} +} + +func (dl DownloadContext) download(path string) (*http.Response, []byte) { + return download(dl.t, dl.urlPrefix+path, dl.Header) +} + +// download `path` and expect content sha1 to be `expectSha1` +func (dl DownloadContext) ExpectSha1(path, expectSha1 string) { + resp, out := dl.download(path) + if resp.StatusCode != 200 { + dl.t.Fatalf("Unexpected status code (expected 200, got %v)", resp.StatusCode) + } + outSha1 := sha1s(out) + if outSha1 != expectSha1 { + dl.t.Fatal("Unexpected content in blob download") + } +} + +// download `path` and expect content data to be `expect` +func (dl DownloadContext) Expect(path, expect string) { + dl.ExpectSha1(path, sha1s([]byte(expect))) +} + +// download `path` and expect HTTP status code to be `code` +func (dl DownloadContext) ExpectCode(path string, code int) { + resp, _ := dl.download(path) + if resp.StatusCode != code { + dl.t.Fatalf("Unexpected status code (expected %v, got %v)", code, resp.StatusCode) + } +} + +func TestBlobDownload(t *testing.T) { + // Prepare test server and "all-ok" auth backend + ts := archiveOKServer(t, "") + defer ts.Close() + ws := startWorkhorseServer(ts.URL) + defer ws.Close() + dl := NewDownloadContext(t, fmt.Sprintf("%s/%s/raw", ws.URL, testProject)) + + dl.Expect("/5f923865/README.md", "testme\n======\n\nSample repo for testing gitlab features\n") + dl.ExpectSha1("/5f923865/README.md", "5f7af35c185a9e5face2f4afb6d7c4f00328d04c") + dl.ExpectSha1("/5f923865/files/ruby/popen.rb", "68990cc20fa74383358797a27967fa2b45d7d8f6") + dl.ExpectSha1("/874797c3/files/ruby/popen.rb", "4c266708f2bfd7ca3fed3f7ec74253f92ff3fe73") + dl.ExpectCode("/master/non-existing-file", 404) +} + +func TestDeniedBlobDownload(t *testing.T) { + // Prepare test server and "all-deny" auth backend + ts := testAuthServer(nil, 403, "Access denied") + defer ts.Close() + ws := startWorkhorseServer(ts.URL) + defer ws.Close() + dl := NewDownloadContext(t, fmt.Sprintf("%s/%s/raw", ws.URL, testProject)) + + dl.ExpectCode("/5f923865/README.md", 403) + dl.ExpectCode("/5f923865/files/ruby/popen.rb", 403) + dl.ExpectCode("/874797c3/files/ruby/popen.rb", 403) + dl.ExpectCode("/master/non-existing-file", 403) +} + +func TestPrivateBlobDownload(t *testing.T) { + // Prepare test server and auth backend: + // access is ok if token is provided either via query or via header + ts := testhelper.TestServerWithHandler(nil, func(w http.ResponseWriter, r *http.Request) { + log.Println("UPSTREAM", r.Method, r.URL) + token_ok1 := r.URL.Query().Get("aaa_token") == "TOKEN-4AAA" + token_ok2 := r.Header.Get("BBB-TOKEN") == "TOKEN-4BBB" + cookie, _ := r.Cookie("_gitlab_session") + cookie_ok3 := (cookie != nil && cookie.Value == "COOKIE-CCC") + if !(token_ok1 || token_ok2 || cookie_ok3) { + w.WriteHeader(403) + fmt.Fprintf(w, "Access denied") + return + } + + // for authorized .../repository/archive.zip reply the same way archiveOKServer does. + aok := archiveOKServer(t, "") + defer aok.Close() + aokurl, err := url.Parse(aok.URL) + if err != nil { + t.Fatal(err) + } + proxy := httputil.NewSingleHostReverseProxy(aokurl) + proxy.ServeHTTP(w, r) + }) + defer ts.Close() + ws := startWorkhorseServer(ts.URL) + defer ws.Close() + dl := NewDownloadContext(t, fmt.Sprintf("%s/%s/raw", ws.URL, testProject)) + + dl.ExpectCode("/5f923865/README.md", 403) + dl.ExpectCode("/5f923865/README.md?bbb_token=TOKEN-4BBB", 403) + dl.ExpectCode("/5f923865/README.md?aaa_token=TOKEN-4AAA", 200) + dl.ExpectSha1("/5f923865/README.md?aaa_token=TOKEN-4AAA", "5f7af35c185a9e5face2f4afb6d7c4f00328d04c") + + dl.Header.Add("AAA-TOKEN", "TOKEN-4AAA") + dl.ExpectCode("/5f923865/README.md", 403) + dl.Header.Add("BBB-TOKEN", "TOKEN-4BBB") + dl.ExpectCode("/5f923865/README.md", 200) + dl.ExpectSha1("/5f923865/README.md", "5f7af35c185a9e5face2f4afb6d7c4f00328d04c") + + dl.Header = make(http.Header) // clear + dl.ExpectCode("/5f923865/README.md", 403) + dl.Header.Set("Cookie", "alpha=1") + dl.ExpectCode("/5f923865/README.md", 403) + dl.Header.Set("Cookie", "alpha=1; beta=2") + dl.ExpectCode("/5f923865/README.md", 403) + dl.Header.Set("Cookie", "alpha=1; _gitlab_session=COOKIE-CCC; beta=2") + dl.ExpectCode("/5f923865/README.md", 200) + dl.ExpectSha1("/5f923865/README.md", "5f7af35c185a9e5face2f4afb6d7c4f00328d04c") }