// Handler for raw blob downloads // // Blobs are read via `git cat-file ...` with first querying authentication // backend about download-access permission for containing repository. package git import ( "gitlab.com/gitlab-org/gitlab-workhorse/internal/api" "gitlab.com/gitlab-org/gitlab-workhorse/internal/helper" "bufio" "errors" "fmt" "io" "log" "net/http" "net/url" "regexp" "strings" ) // HTTP handler for `.../raw/<ref>/path` func GetBlobRaw(a *api.API) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { handleGetBlobRaw(a, w, r) }) } var rawRe = regexp.MustCompile(`/raw/`) func handleGetBlobRaw(a *api.API, w http.ResponseWriter, r *http.Request) { // Extract project & refpath // <project>/raw/branch/file -> <project>, branch/file u := r.URL rawLoc := rawRe.FindStringIndex(u.Path) if rawLoc == nil { helper.Fail500(w, r, errors.New("extract project name")) return } project := u.Path[:rawLoc[0]] refpath := u.Path[rawLoc[1]:] // Prepare userinfo var user *url.Userinfo username, password, ok := r.BasicAuth() if ok { user = url.UserPassword(username, password) } // Query download access auth for this project authReply := a.VerifyDownloadAccess(project, user, u.RawQuery, r.Header) if authReply.RepoPath == "" { // access denied - copy auth reply to client in full - // there are HTTP code and other headers / body relevant for // about why access was denied. for k, v := range authReply.RawReply.HeaderMap { w.Header()[k] = v } w.WriteHeader(authReply.RawReply.Code) // NOTE do not consume authReply.RawReply.Body with io.Copy() - // this way it will be read one time only and next reads will be empty. _, err := w.Write(authReply.RawReply.Body.Bytes()) if err != nil { helper.LogError(r, fmt.Errorf("writing authReply.RawReply.Body: %v", err)) } return } // Access granted - we can emit the blob emitBlob(w, authReply.RepoPath, refpath, r) } // Emit content of blob located at <ref>/path (jointly denoted as 'refpath') to output func emitBlob(w http.ResponseWriter, repopath string, refpath string, r *http.Request) { // Communicate with `git cat-file --batch` trying refs from longest // to shortest prefix in refpath. This way we find longest-match for // ref and get blob sha1 and content in the end. queryCmd := gitCommand("", "", "git", "--git-dir="+repopath, "cat-file", "--batch") queryStdin, err := queryCmd.StdinPipe() if err != nil { helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; stdin: %v", err)) return } defer queryStdin.Close() queryStdout, err := queryCmd.StdoutPipe() if err != nil { helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; stdout: %v", err)) return } defer queryStdout.Close() queryReader := bufio.NewReader(queryStdout) err = queryCmd.Start() if err != nil { helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; start: %v", err)) return } defer helper.CleanUpProcessGroup(queryCmd) // refpath components as vector refpathv := strings.Split(refpath, "/") // scan from right to left and try to change '/' -> ':' and see if it // creates a correct git object name. If it does - we read object // content which follows. var sha1, type_ string var size int64 for i := len(refpathv); i > 0; i-- { ref := strings.Join(refpathv[:i], "/") path := strings.Join(refpathv[i:], "/") _, err := fmt.Fprintf(queryStdin, "%s:%s\n", ref, path) if err != nil { helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; write: %v", err)) return } reply, err := queryReader.ReadString('\n') if err != nil { helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; read: %v", err)) return } // <object> SP missing LF if strings.HasSuffix(reply, " missing\n") { continue } // <sha1> SP <type> SP <size> LF _, err = fmt.Sscanf(reply, "%s %s %d\n", &sha1, &type_, &size) if err != nil { helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; reply parse: %v", err)) return } if type_ != "blob" { log.Printf("git cat-file --batch-check; %v is not blob (is %v)", sha1, type_) sha1 = "" // so it will return 404 } // git object found break } // Blob not found -> 404 if sha1 == "" { http.Error(w, fmt.Sprintf("Blob for %v not found", refpath), http.StatusNotFound) return } // Blob found - start writing response w.Header().Set("Content-Disposition", "inline") w.Header().Set("Content-Transfer-Encoding", "binary") w.Header().Set("Content-Length", fmt.Sprintf("%d", size)) w.Header().Set("X-Content-Type-Options", "nosniff") // net/http sniffs stream and automatically detects and sets // Content-Type header. We do not have to do it ourselves. w.Header().Set("Cache-Control", "private") // Rails sets this for IE compatibility w.Header().Set("ETag", fmt.Sprintf(`"%s"`, sha1)) w.WriteHeader(http.StatusOK) // Don't bother with HTTP 500 from this point on, just return // XXX better use queryStdout instead of queryReader, but we could be // holding some tail bytes in queryReader after chat phase _, err = io.CopyN(w, queryReader, size) if err != nil { helper.LogError(r, fmt.Errorf("io.CopyN: %v", err)) return } // close git stdin explicitly, so it can exit cleanly err = queryStdin.Close() if err != nil { helper.LogError(r, fmt.Errorf("queryStdin.Close: %v", err)) return } err = queryCmd.Wait() if err != nil { helper.LogError(r, fmt.Errorf("wait: %v", err)) return } }