// Handler for raw blob downloads
//
// Blobs are read via `git cat-file ...` with first querying authentication
// backend about download-access permission for containing repository.

package git

import (
	"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
	"gitlab.com/gitlab-org/gitlab-workhorse/internal/helper"
	"bufio"
	"errors"
	"fmt"
	"io"
	"log"
	"net/http"
	"net/url"
	"regexp"
	"strings"
)

// HTTP handler for `.../raw/<ref>/path`
func GetBlobRaw(a *api.API) http.Handler {
	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		handleGetBlobRaw(a, w, r)
	})
}

var rawRe = regexp.MustCompile(`/raw/`)

func handleGetBlobRaw(a *api.API, w http.ResponseWriter, r *http.Request) {
	// Extract project & refpath
	// <project>/raw/branch/file -> <project>, branch/file
	u := r.URL
	rawLoc := rawRe.FindStringIndex(u.Path)
	if rawLoc == nil {
		helper.Fail500(w, r, errors.New("extract project name"))
		return
	}
	project := u.Path[:rawLoc[0]]
	refpath := u.Path[rawLoc[1]:]

	// Prepare userinfo
	var user *url.Userinfo
	username, password, ok := r.BasicAuth()
	if ok {
		user = url.UserPassword(username, password)
	}

	// Query download access auth for this project
	authReply := a.VerifyDownloadAccess(project, user, u.RawQuery, r.Header)
	if authReply.RepoPath == "" {
		// access denied - copy auth reply to client in full -
		// there are HTTP code and other headers / body relevant for
		// about why access was denied.
		for k, v := range authReply.RawReply.HeaderMap {
			w.Header()[k] = v
		}
		w.WriteHeader(authReply.RawReply.Code)
		// NOTE do not consume authReply.RawReply.Body with io.Copy() -
		// this way it will be read one time only and next reads will be empty.
		_, err := w.Write(authReply.RawReply.Body.Bytes())
		if err != nil {
			helper.LogError(r, fmt.Errorf("writing authReply.RawReply.Body: %v", err))
		}
		return
	}

	// Access granted - we can emit the blob
	emitBlob(w, authReply.RepoPath, refpath, r)
}

// Emit content of blob located at <ref>/path (jointly denoted as 'refpath') to output
func emitBlob(w http.ResponseWriter, repopath string, refpath string, r *http.Request) {
	// Communicate with `git cat-file --batch` trying refs from longest
	// to shortest prefix in refpath. This way we find longest-match for
	// ref and get blob sha1 and content in the end.
	queryCmd := gitCommand("", "", "git", "--git-dir="+repopath, "cat-file", "--batch")
	queryStdin, err := queryCmd.StdinPipe()
	if err != nil {
		helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; stdin: %v", err))
		return
	}
	defer queryStdin.Close()
	queryStdout, err := queryCmd.StdoutPipe()
	if err != nil {
		helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; stdout: %v", err))
		return
	}
	defer queryStdout.Close()
	queryReader := bufio.NewReader(queryStdout)

	err = queryCmd.Start()
	if err != nil {
		helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; start: %v", err))
		return
	}
	defer helper.CleanUpProcessGroup(queryCmd)

	// refpath components as vector
	refpathv := strings.Split(refpath, "/")

	// scan from right to left and try to change '/' -> ':' and see if it
	// creates a correct git object name. If it does - we read object
	// content which follows.
	var sha1, type_ string
	var size int64
	for i := len(refpathv); i > 0; i-- {
		ref := strings.Join(refpathv[:i], "/")
		path := strings.Join(refpathv[i:], "/")
		_, err := fmt.Fprintf(queryStdin, "%s:%s\n", ref, path)
		if err != nil {
			helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; write: %v", err))
			return
		}

		reply, err := queryReader.ReadString('\n')
		if err != nil {
			helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; read: %v", err))
			return
		}

		// <object> SP missing LF
		if strings.HasSuffix(reply, " missing\n") {
			continue
		}

		// <sha1> SP <type> SP <size> LF
		_, err = fmt.Sscanf(reply, "%s %s %d\n", &sha1, &type_, &size)
		if err != nil {
			helper.Fail500(w, r, fmt.Errorf("git cat-file --batch; reply parse: %v", err))
			return
		}

		if type_ != "blob" {
			log.Printf("git cat-file --batch-check; %v is not blob (is %v)", sha1, type_)
			sha1 = "" // so it will return 404
		}

		// git object found
		break
	}

	// Blob not found -> 404
	if sha1 == "" {
		http.Error(w, fmt.Sprintf("Blob for %v not found", refpath), http.StatusNotFound)
		return
	}

	// Blob found - start writing response
	w.Header().Set("Content-Disposition", "inline")
	w.Header().Set("Content-Transfer-Encoding", "binary")
	w.Header().Set("Content-Length", fmt.Sprintf("%d", size))
	w.Header().Set("X-Content-Type-Options", "nosniff")
	// net/http sniffs stream and automatically detects and sets
	// Content-Type header. We do not have to do it ourselves.

	w.Header().Set("Cache-Control", "private") // Rails sets this for IE compatibility
	w.Header().Set("ETag", fmt.Sprintf(`"%s"`, sha1))

	w.WriteHeader(http.StatusOK) // Don't bother with HTTP 500 from this point on, just return
	// XXX better use queryStdout instead of queryReader, but we could be
	// holding some tail bytes in queryReader after chat phase
	_, err = io.CopyN(w, queryReader, size)
	if err != nil {
		helper.LogError(r, fmt.Errorf("io.CopyN: %v", err))
		return
	}

	// close git stdin explicitly, so it can exit cleanly
	err = queryStdin.Close()
	if err != nil {
		helper.LogError(r, fmt.Errorf("queryStdin.Close: %v", err))
		return
	}

	err = queryCmd.Wait()
	if err != nil {
		helper.LogError(r, fmt.Errorf("wait: %v", err))
		return
	}
}