Rewrite in Go

This is more-or-less 1-to-1 port of git-backup to Go. There are things we handle a bit differently: - there is a separate type for Sha1 - conversion of repo paths to git references is now more robust wrt avoiding not-allowed in git constructs like ".." or ".lock" https://git.kernel.org/cgit/git/git.git/tree/refs.c?h=v2.9.0-37-g6d523a3#n34 The rewrite happened because we need to optimize restore, and for e.g. parallelizing part it should be convenient to use goroutines and channels. I'm not very comfortable with how error handling is done, because contrary to what canonical Go way seems to be, in a lot of places it still looks to me exceptions are better idea compared to just error codes, though in many places just error codes are better and makes more sense. Probably there will be less exceptions over time once the code starts to be collaborating set of goroutines with communications done via channels. Still a lot of python habits on my side. And as a bonus we now have end-to-end pull/restore tests...

Rewrite in Go
This is more-or-less 1-to-1 port of git-backup to Go. There are things we handle a bit differently: - there is a separate type for Sha1 - conversion of repo paths to git references is now more robust wrt avoiding not-allowed in git constructs like ".." or ".lock" https://git.kernel.org/cgit/git/git.git/tree/refs.c?h=v2.9.0-37-g6d523a3#n34 The rewrite happened because we need to optimize restore, and for e.g. parallelizing part it should be convenient to use goroutines and channels. I'm not very comfortable with how error handling is done, because contrary to what canonical Go way seems to be, in a lot of places it still looks to me exceptions are better idea compared to just error codes, though in many places just error codes are better and makes more sense. Probably there will be less exceptions over time once the code starts to be collaborating set of goroutines with communications done via channels. Still a lot of python habits on my side. And as a bonus we now have end-to-end pull/restore tests...
28986e0e · Kirill Smelkov · a6cfe210 · 28986e0e · 28986e0e · 28986e0e
Commit 28986e0e authored Jul 06, 2016 by Kirill Smelkov
79 changed files
--- a/.gitignore
+++ b/.gitignore
+git-backup
--- a/README.rst
+++ b/README.rst
@@ -50,12 +50,12 @@ Backup workflow is:
     $ git pull ...


-Please see `git-backup`__ source with technical overview on how it works.
+Please see `git-backup.go`__ source with technical overview on how it works.

 We also provide convenience program to pull/restore backup data for a GitLab
 instance into/from git-backup managed repository. See `contrib/gitlab-backup`__
 for details.


-__ git-backup
+__ git-backup.go
 __ contrib/gitlab-backup
--- a/error.go
+++ b/error.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+// Git-backup | Exception-style errors
+package main
+
+import (
+    "fmt"
+    "runtime"
+    "strings"
+)
+
+// error type which is raised by raise(arg)
+type Error struct {
+    arg  interface{}
+    link *Error // chain of linked Error(s) - see e.g. errcontext()
+}
+
+func (e *Error) Error() string {
+    msgv := []string{}
+    msg := ""
+    for e != nil {
+        // TODO(go1.7) -> runtime.Frame  (see xtraceback())
+        if f, ok := e.arg.(Frame); ok {
+            //msg = f.Function
+            //msg = fmt.Sprintf("%s (%s:%d)", f.Function, f.File, f.Line)
+            msg = strings.TrimPrefix(f.Name(), _errorpkgdot) // XXX -> better prettyfunc
+        } else {
+            msg = fmt.Sprint(e.arg)
+        }
+        msgv = append(msgv, msg)
+        e = e.link
+    }
+
+    return strings.Join(msgv, ": ")
+}
+
+// turn any value into Error
+// if v is already Error - it stays the same
+// otherwise new Error is created
+func aserror(v interface{}) *Error {
+    if e, ok := v.(*Error); ok {
+        return e
+    }
+    return &Error{v, nil}
+}
+
+// raise error to upper level
+func raise(arg interface{}) {
+    panic(aserror(arg))
+}
+
+// raise formatted string
+func raisef(format string, a ...interface{}) {
+    raise(fmt.Sprintf(format, a...))
+}
+
+// raise if err != nil
+// NOTE err can be != nil even if typed obj = nil:
+//   var obj *T;
+//   err = obj
+//   err != nil     is true
+func raiseif(err error) {
+    //if err != nil && !reflect.ValueOf(err).IsNil() {
+    if err != nil {
+        raise(err)
+    }
+}
+
+// checks recovered value to be of *Error
+// if there is non-Error error - repanic it
+// otherwise return Error either nil (no panic), or actual value
+func _errcatch(r interface{}) *Error {
+    e, _ := r.(*Error)
+    if e == nil && r != nil {
+        panic(r)
+    }
+    return e
+}
+
+// catch error and call f(e) if it was caught.
+// must be called under defer
+func errcatch(f func(e *Error)) {
+    e := _errcatch(recover())
+    if e == nil {
+        return
+    }
+
+    f(e)
+}
+
+// be notified when error unwinding is being happening.
+// hook into unwinding process with f() call. Returned error is reraised.
+// see also: errcontext()
+// must be called under defer
+func erronunwind(f func(e *Error) *Error) {
+    // cannot do errcatch(...)
+    // as recover() works only in first-level called functions
+    e := _errcatch(recover())
+    if e == nil {
+        return
+    }
+
+    e = f(e)
+    panic(e)
+}
+
+// provide error context to automatically add on unwinding.
+// f is called if error unwinding is happening.
+// call result is added to raised error as "prefix" context
+// must be called under defer
+func errcontext(f func() interface{}) {
+    e := _errcatch(recover())
+    if e == nil {
+        return
+    }
+
+    arg := f()
+    panic(erraddcontext(e, arg))
+}
+
+// add "prefix" context to error
+func erraddcontext(e *Error, arg interface{}) *Error {
+    return &Error{arg, e}
+}
+
+func _myfuncname(nskip int) string {
+    pcv := [1]uintptr{}
+    runtime.Callers(nskip, pcv[:])
+    f := runtime.FuncForPC(pcv[0])
+    if f == nil {
+        return ""
+    }
+    return f.Name()
+}
+
+// get name of currently running function (caller of myfuncname())
+// name is fully qualified package/name.function(.x)
+func myfuncname() string {
+    return _myfuncname(3)
+}
+
+// get name of currently running function's package
+// package is fully qualified package/name
+func mypkgname() string {
+    myfunc := _myfuncname(3)
+    if myfunc == "" {
+        return ""
+    }
+    // NOTE dots in package name are escaped by go as %2e
+    // this way the first dot is delimiter between package and function
+    idot := strings.IndexByte(myfunc, '.')
+    if idot == -1 {
+        panic(fmt.Errorf("funcname %q is not fully qualified", myfunc))
+    }
+    return myfunc[:idot]
+}
+
+// TODO(go1.7) goes away in favour of runtime.Frame
+type Frame struct {
+    *runtime.Func
+    pc  uintptr
+}
+
+// get current calling traceback as []Frame
+// nskip meaning: the same as in runtime.Callers()
+// TODO(go1.7) []Frame -> []runtime.Frame
+func xtraceback(nskip int) []Frame {
+    // all callers
+    var pcv = []uintptr{0}
+    for {
+        pcv = make([]uintptr, 2*len(pcv))
+        n := runtime.Callers(nskip+1, pcv)
+        if n < len(pcv) {
+            pcv = pcv[:n]
+            break
+        }
+    }
+
+    // pcv -> frames
+/*
+    framev := make([]runtime.Frame, 0, len(pcv))
+    frames := runtime.CallersFrames(pcv)
+    for more := true; more; {
+        var frame runtime.Frame
+        frame, more = frames.Next()
+        framev = append(framev, frame)
+    }
+*/
+    framev := make([]Frame, 0, len(pcv))
+    for _, pc := range pcv {
+        framev = append(framev, Frame{runtime.FuncForPC(pc), pc})
+    }
+
+    return framev
+}
+
+var (
+    _errorpkgname string // package name under which error.go lives
+    _errorpkgdot  string // errorpkg.
+    _errorraise   string // errorpkg.raise
+)
+
+func init() {
+    _errorpkgname = mypkgname()
+    _errorpkgdot  = _errorpkgname + "."
+    _errorraise   = _errorpkgname + ".raise"
+}
+
+// add calling context to error.
+// Add calling function names as error context up-to topfunc not including.
+// see also: erraddcontext()
+func erraddcallingcontext(topfunc string, e *Error) *Error {
+    seenraise := false
+    for _, f := range xtraceback(2) {
+        // do not show anything after raise*()
+        if !seenraise && strings.HasPrefix(f.Name(), _errorraise) {
+            seenraise = true
+            continue
+        }
+        if !seenraise {
+            continue
+        }
+
+        // do not go beyond topfunc
+        if topfunc != "" && f.Name() == topfunc {
+            break
+        }
+
+        // skip intermediates
+        if strings.HasSuffix(f.Name(), "_") { // XXX -> better skipfunc
+            continue
+        }
+
+        e = &Error{f, e}
+    }
+
+    return e
+}
--- a/error_test.go
+++ b/error_test.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+package main
+
+import (
+    "strings"
+    "testing"
+)
+
+func do_raise1() {
+    raise(1)
+}
+
+func TestErrRaiseCatch(t *testing.T) {
+    defer errcatch(func(e *Error) {
+        if !(e.arg == 1 && e.link == nil) {
+            t.Fatalf("error caught but unexpected: %#v  ; want {1, nil}", e)
+        }
+    })
+    do_raise1()
+    t.Fatal("error not caught")
+}
+
+// verify err chain has .arg(s) as expected
+func verifyErrChain(t *testing.T, e *Error, argv ...interface{}) {
+    i := 0
+    for ; e != nil; i, e = i+1, e.link {
+        if i >= len(argv) {
+            t.Fatal("too long error chain")
+        }
+        if e.arg != argv[i] {
+            t.Fatalf("error caught but unexpected %vth arg: %v  ; want %v", i, e.arg, argv[i])
+        }
+    }
+    if i < len(argv) {
+        t.Fatal("too small error chain")
+    }
+}
+
+func do_onunwind1(t *testing.T) {
+    defer erronunwind(func(e *Error) *Error {
+        t.Fatal("on unwind called without raise")
+        return nil
+    })
+}
+
+func do_onunwind2() {
+    defer erronunwind(func(e *Error) *Error {
+        return &Error{2, e}
+    })
+    do_raise1()
+}
+
+func TestErrOnUnwind(t *testing.T) {
+    defer errcatch(func(e *Error) {
+        verifyErrChain(t, e, 2, 1)
+    })
+    do_onunwind1(t)
+    do_onunwind2()
+    t.Fatal("error not caught")
+}
+
+func do_context1(t *testing.T) {
+    defer errcontext(func() interface{} {
+        t.Fatal("on context called without raise")
+        return nil
+    })
+}
+
+func do_context2() {
+    defer errcontext(func() interface{} {
+        return 3
+    })
+    do_raise1()
+}
+
+func TestErrContext(t *testing.T) {
+    defer errcatch(func(e *Error) {
+        verifyErrChain(t, e, 3, 1)
+    })
+    do_context1(t)
+    do_context2()
+    t.Fatal("error not caught")
+}
+
+func TestMyFuncName(t *testing.T) {
+    myfunc := myfuncname()
+    // go test changes full package name (putting filesystem of the tree into ti)
+    // thus we check only for suffix
+    wantsuffix := ".TestMyFuncName"
+    if !strings.HasSuffix(myfunc, wantsuffix) {
+        t.Errorf("myfuncname() -> %v  ; want *%v", myfunc, wantsuffix)
+    }
+}
+
+func do_raise11() {
+    do_raise1()
+}
+
+func TestErrAddCallingContext(t *testing.T) {
+    myfunc := myfuncname()
+    defer errcatch(func(e *Error) {
+        e = erraddcallingcontext(myfunc, e)
+        msg, want := e.Error(), "do_raise11: do_raise1: 1"
+        if msg != want {
+            t.Fatalf("err + calling context: %q  ; want %q", msg, want)
+        }
+    })
+    do_raise11()
+    t.Fatal("error not caught")
+}
--- a/git-backup
+++ b/git-backup
--- a/git-backup.go
+++ b/git-backup.go
--- a/git-backup_test.go
+++ b/git-backup_test.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+package main
+
+import (
+    "fmt"
+    "io/ioutil"
+    "os"
+    "path/filepath"
+    "regexp"
+    "strings"
+    "syscall"
+    "testing"
+)
+
+func xgetcwd(t *testing.T) string {
+    cwd, err := os.Getwd()
+    if err != nil {
+        t.Fatal(err)
+    }
+    return cwd
+}
+
+func xchdir(t *testing.T, dir string) {
+    err := os.Chdir(dir)
+    if err != nil {
+        t.Fatal(err)
+    }
+}
+
+// verify end-to-end pull-restore
+func TestPullRestore(t *testing.T) {
+    // if something raises -> don't let testing panic - report it as proper error with context.
+    here := myfuncname()
+    defer errcatch(func(e *Error) {
+        e = erraddcallingcontext(here, e)
+
+        // add file:line for failing code inside testing function - so we have exact context to debug
+        failedat := ""
+        for _, f := range xtraceback(1) {
+            if f.Name() == here {
+                // TODO(go1.7) -> f.File, f.Line  (f becomes runtime.Frame)
+                file, line := f.FileLine(f.pc - 1)
+                failedat = fmt.Sprintf("%s:%d", filepath.Base(file), line)
+                break
+            }
+        }
+        if failedat == "" {
+            panic(fmt.Errorf("cannot lookup failedat for %s", here))
+        }
+
+        t.Errorf("%s: %v", failedat, e)
+    })
+
+    workdir, err := ioutil.TempDir("", "t-git-backup")
+    if err != nil {
+        t.Fatal(err)
+    }
+    defer os.RemoveAll(workdir)
+
+    mydir := xgetcwd(t)
+    xchdir(t, workdir)
+    defer xchdir(t, mydir)
+
+    // -test.v -> verbosity of git-backup
+    if testing.Verbose() {
+        verbose = 1
+    } else {
+        verbose = 0
+    }
+
+    // init backup repository
+    xgit("init", "--bare", "backup.git")
+    xchdir(t, "backup.git")
+
+    // pull from testdata
+    my1 := mydir + "/testdata/1"
+    cmd_pull([]string{my1+":b1"})
+
+    // prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits)
+    xgit("prune")
+
+    // verify backup repo is all ok
+    xgit("fsck")
+
+    // verify that just pulled tag objects are now gone after pruning -
+    // - they become not directly git-present. The only possibility to
+    // get them back is via recreating from encoded commit objects.
+    tags := []string{"11e67095628aa17b03436850e690faea3006c25d",
+                     "ba899e5639273a6fa4d50d684af8db1ae070351e",
+                     "7124713e403925bc772cd252b0dec099f3ced9c5",
+                     "f735011c9fcece41219729a33f7876cd8791f659"}
+    for _, tag := range tags {
+        gerr, _, _ := git("cat-file", "-p", tag)
+        if gerr == nil {
+            t.Fatalf("tag %s still present in backup.git after git-prune", tag)
+        }
+    }
+
+    // restore backup
+    work1 := workdir + "/1"
+    cmd_restore([]string{"HEAD", "b1:"+work1})
+
+    // verify files restored to the same as original
+    gerr, diff, _ := git("diff", "--no-index", "--raw", "--exit-code", my1, work1)
+    // 0 - no diff, 1 - has diff, 2 - problem
+    if gerr != nil && gerr.Sys().(syscall.WaitStatus).ExitStatus() > 1 {
+        t.Fatal(gerr)
+    }
+    gitObjectsRe := regexp.MustCompile(`\.git/objects/`)
+    for _, diffline := range strings.Split(diff, "\n") {
+        // :srcmode dstmode srcsha1 dstsha1 status\tpath
+        _, path, err := headtail(diffline, "\t")
+        if err != nil {
+            t.Fatalf("restorecheck: cannot parse diff line %q", diffline)
+        }
+        // git objects can be represented differently (we check them later)
+        if gitObjectsRe.FindString(path) != "" {
+            continue
+        }
+        t.Fatal("restorecheck: unexpected diff:", diffline)
+    }
+
+    // verify git objects restored to the same as original
+    err = filepath.Walk(my1, func(path string, info os.FileInfo, err error) error {
+        // any error -> stop
+        if err != nil {
+            return err
+        }
+
+        // non *.git/ -- not interesting
+        if !(info.IsDir() && strings.HasSuffix(path, ".git")) {
+            return nil
+        }
+
+        // found git repo - check refs & objects in original and restored are exactly the same,
+        var R = [2]struct{ path, reflist, revlist string }{
+            {path: path},                       // original
+            {path: reprefix(my1, work1, path)}, // restored
+        }
+
+        for _, repo := range R {
+            // fsck just in case
+            xgit("--git-dir=" + repo.path, "fsck")
+            // NOTE for-each-ref sorts output by refname
+            repo.reflist = xgit("--git-dir=" + repo.path, "for-each-ref")
+            // NOTE rev-list emits objects in reverse chronological order,
+            //      starting from refs roots which are also ordered by refname
+            repo.revlist = xgit("--git-dir=" + repo.path, "rev-list", "--all", "--objects")
+        }
+
+        if R[0].reflist != R[1].reflist {
+            t.Fatalf("restorecheck: %q restored with different reflist (in %q)", R[0].path, R[1].path)
+        }
+
+        if R[0].revlist != R[1].revlist {
+            t.Fatalf("restorecheck: %q restored with differrent objects (in %q)", R[0].path, R[1].path)
+        }
+
+        // .git verified - no need to recurse
+        return filepath.SkipDir
+    })
+
+    if err != nil {
+        t.Fatal(err)
+    }
+
+    // now try to pull corrupt repo - pull should refuse if transferred pack contains bad objects
+    my2 := mydir + "/testdata/2"
+    func() {
+        defer errcatch(func(e *Error) {
+            // it ok - pull should raise
+        })
+        cmd_pull([]string{my2+":b2"})
+        t.Fatal("fetching from corrupt.git did not complain")
+    }()
+}
+
+func TestRepoRefSplit(t *testing.T) {
+    var tests = []struct{ reporef, repo, ref string }{
+        {"kirr/wendelin.core.git/heads/master", "kirr/wendelin.core.git", "heads/master"},
+        {"kirr/erp5.git/backup/x/master+erp5-data-notebook", "kirr/erp5.git", "backup/x/master+erp5-data-notebook"},
+        {"tiwariayush/Discussion%20Forum%20.git/...", "tiwariayush/Discussion Forum .git", "..."},
+        {"tiwariayush/Discussion%20Forum+.git/...", "tiwariayush/Discussion Forum+.git", "..."},
+        {"tiwariayush/Discussion%2BForum+.git/...", "tiwariayush/Discussion+Forum+.git", "..."},
+    }
+
+    for _, tt := range tests {
+        repo, ref := reporef_split(tt.reporef)
+        if repo != tt.repo || ref != tt.ref {
+            t.Errorf("reporef_split(%q) -> %q %q  ; want %q %q", tt.reporef, repo, ref, tt.repo, tt.ref)
+        }
+    }
+}
--- a/git.go
+++ b/git.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+// Git-backup | Run git subprocess
+package main
+
+import (
+    "bytes"
+    "fmt"
+    "os"
+    "os/exec"
+    "strings"
+)
+
+// how/whether to redirect stdio of spawned process
+type StdioRedirect int
+
+const (
+    PIPE StdioRedirect = iota // connect stdio channel via PIPE to parent (default value)
+    DontRedirect
+)
+
+type RunWith struct {
+    stdin  string
+    stdout StdioRedirect     // PIPE | DontRedirect
+    stderr StdioRedirect     // PIPE | DontRedirect
+    raw    bool              // !raw -> stdout, stderr are stripped
+    env    map[string]string // !nil -> subprocess environment setup from env
+}
+
+// run `git *argv` -> error, stdout, stderr
+func _git(argv []string, ctx RunWith) (err error, stdout, stderr string) {
+    debugf("git %s", strings.Join(argv, " "))
+
+    cmd := exec.Command("git", argv...)
+    stdoutBuf := bytes.Buffer{}
+    stderrBuf := bytes.Buffer{}
+
+    if ctx.stdin != "" {
+        cmd.Stdin = strings.NewReader(ctx.stdin)
+    }
+
+    switch ctx.stdout {
+    case PIPE:
+        cmd.Stdout = &stdoutBuf
+    case DontRedirect:
+        cmd.Stdout = os.Stdout
+    default:
+        panic("git: stdout redirect mode invalid")
+    }
+
+    switch ctx.stderr {
+    case PIPE:
+        cmd.Stderr = &stderrBuf
+    case DontRedirect:
+        cmd.Stderr = os.Stderr
+    default:
+        panic("git: stderr redirect mode invalid")
+    }
+
+    if ctx.env != nil {
+        env := []string{}
+        for k, v := range ctx.env {
+            env = append(env, k+"="+v)
+        }
+        cmd.Env = env
+    }
+
+    err = cmd.Run()
+    stdout = String(stdoutBuf.Bytes())
+    stderr = String(stderrBuf.Bytes())
+
+    if !ctx.raw {
+        // prettify stdout (e.g. so that 'sha1\n' becomes 'sha1' and can be used directly
+        stdout = strings.TrimSpace(stdout)
+        stderr = strings.TrimSpace(stderr)
+    }
+
+    return err, stdout, stderr
+}
+
+// error a git command returned
+type GitError struct {
+    GitErrContext
+    *exec.ExitError
+}
+
+type GitErrContext struct {
+    argv   []string
+    stdin  string
+    stdout string
+    stderr string
+}
+
+func (e *GitError) Error() string {
+    msg := e.GitErrContext.Error()
+    if e.stderr == "" {
+        msg += "(failed)\n"
+    }
+    return msg
+}
+
+func (e *GitErrContext) Error() string {
+    msg := "git " + strings.Join(e.argv, " ")
+    if e.stdin == "" {
+        msg += " </dev/null\n"
+    } else {
+        msg += " <<EOF\n" + e.stdin
+        if !strings.HasSuffix(msg, "\n") {
+            msg += "\n"
+        }
+        msg += "EOF\n"
+    }
+
+    msg += e.stderr
+    if !strings.HasSuffix(msg, "\n") {
+        msg += "\n"
+    }
+    return msg
+}
+
+// argv -> []string, ctx    (for passing argv + RunWith handy - see git() for details)
+func _gitargv(argv ...interface{}) (argvs []string, ctx RunWith) {
+    ctx_seen := false
+
+    for _, arg := range argv {
+        switch arg := arg.(type) {
+        case string:
+            argvs = append(argvs, arg)
+        default:
+            argvs = append(argvs, fmt.Sprint(arg))
+        case RunWith:
+            if ctx_seen {
+                panic("git: multiple RunWith contexts")
+            }
+            ctx, ctx_seen = arg, true
+        }
+    }
+
+    return argvs, ctx
+}
+
+// run `git *argv` -> err, stdout, stderr
+// - arguments are automatically converted to strings
+// - RunWith argument is passed as ctx
+// - error is returned only when git command could run and exits with error status
+// - on other errors - exception is raised
+//
+// NOTE err is concrete *GitError, not error
+func git(argv ...interface{}) (err *GitError, stdout, stderr string) {
+    return git2(_gitargv(argv...))
+}
+
+func git2(argv []string, ctx RunWith) (err *GitError, stdout, stderr string) {
+    e, stdout, stderr := _git(argv, ctx)
+    eexec, _ := e.(*exec.ExitError)
+    if e != nil && eexec == nil {
+        raisef("git %s : ", strings.Join(argv, " "), e)
+    }
+    if eexec != nil {
+        err = &GitError{GitErrContext{argv, ctx.stdin, stdout, stderr}, eexec}
+    }
+    return err, stdout, stderr
+}
+
+// run `git *argv` -> stdout
+// on error - raise exception
+func xgit(argv ...interface{}) string {
+    return xgit2(_gitargv(argv...))
+}
+
+func xgit2(argv []string, ctx RunWith) string {
+    gerr, stdout, _ := git2(argv, ctx)
+    if gerr != nil {
+        raise(gerr)
+    }
+    return stdout
+}
+
+// like xgit(), but automatically parse stdout to Sha1
+func xgitSha1(argv ...interface{}) Sha1 {
+    return xgit2Sha1(_gitargv(argv...))
+}
+
+// error when git output is not valid sha1
+type GitSha1Error struct {
+    GitErrContext
+}
+
+func (e *GitSha1Error) Error() string {
+    msg := e.GitErrContext.Error()
+    msg += fmt.Sprintf("expected valid sha1 (got %q)\n", e.stdout)
+    return msg
+}
+
+func xgit2Sha1(argv []string, ctx RunWith) Sha1 {
+    gerr, stdout, stderr := git2(argv, ctx)
+    if gerr != nil {
+        raise(gerr)
+    }
+    sha1, err := Sha1Parse(stdout)
+    if err != nil {
+        raise(&GitSha1Error{GitErrContext{argv, ctx.stdin, stdout, stderr}})
+    }
+    return sha1
+}
--- a/gitobjects.go
+++ b/gitobjects.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+// Git-backup | Git object: Blob Tree Commit Tag
+package main
+
+import (
+    "errors"
+    "fmt"
+    "strings"
+)
+
+type Commit struct {
+    tree    Sha1
+    parentv []Sha1
+    msg     string
+}
+
+type Tag struct {
+    tagged_type string
+    tagged_sha1 Sha1
+    // TODO msg
+}
+
+// TODO Tree (if/when needed)
+// TODO Blob (if/when needed)
+
+// load/parse Commit
+
+// extract .tree .parent[] and .msg
+//
+// unfortunately `git show --format=%B` adds newline and optionally wants to
+// reencode commit message and otherwise heavily rely on rev-list traversal
+// machinery -> so we decode commit by hand in a plumbing way.
+func xload_commit(commit_sha1 Sha1) (commit *Commit, commit_raw string) {
+    gerr, commit_raw, _ := git("cat-file", "commit", commit_sha1, RunWith{raw: true})
+    if gerr != nil {
+        raise(&CommitLoadError{commit_sha1, gerr})
+    }
+    commit, err := commit_parse(commit_raw)
+    if err != nil {
+        raise(&CommitLoadError{commit_sha1, err})
+    }
+    return commit, commit_raw
+}
+
+type CommitLoadError struct {
+    commit_sha1 Sha1
+    err         error
+}
+
+func (e *CommitLoadError) Error() string {
+    return fmt.Sprintf("commit %s: %s", e.commit_sha1, e.err)
+}
+
+func commit_parse(commit_raw string) (*Commit, error) {
+    c := Commit{}
+    head, msg, err := headtail(commit_raw, "\n\n")
+    c.msg = msg
+    if err != nil {
+        return nil, errors.New("cannot split to head & msg")
+    }
+
+    headv := strings.Split(head, "\n")
+    if len(headv) == 0 {
+        return nil, errors.New("empty header")
+    }
+    _, err = fmt.Sscanf(headv[0], "tree %s\n", &c.tree)
+    if err != nil {
+        return nil, errors.New("bad tree entry")
+    }
+    for _, h := range headv[1:] {
+        if !strings.HasPrefix(h, "parent ") {
+            break
+        }
+        p := Sha1{}
+        _, err = fmt.Sscanf(h, "parent %s\n", &p)
+        if err != nil {
+            return nil, errors.New("bad parent entry")
+        }
+        c.parentv = append(c.parentv, p)
+    }
+    return &c, nil
+}
+
+// load/parse Tag
+func xload_tag(tag_sha1 Sha1) (tag *Tag, tag_raw string) {
+    gerr, tag_raw, _ := git("cat-file", "tag", tag_sha1, RunWith{raw: true})
+    if gerr != nil {
+        raise(&TagLoadError{tag_sha1, gerr})
+    }
+    tag, err := tag_parse(tag_raw)
+    if err != nil {
+        raise(&TagLoadError{tag_sha1, err})
+    }
+    return tag, tag_raw
+}
+
+type TagLoadError struct {
+    tag_sha1 Sha1
+    err      error
+}
+
+func (e *TagLoadError) Error() string {
+    return fmt.Sprintf("tag %s: %s", e.tag_sha1, e.err)
+}
+
+func tag_parse(tag_raw string) (*Tag, error) {
+    t := Tag{}
+    _, err := fmt.Sscanf(tag_raw, "object %s\ntype %s\n", &t.tagged_sha1, &t.tagged_type)
+    if err != nil {
+        return nil, errors.New("invalid header")
+    }
+    return &t, nil
+}
+
+// parse lstree entry
+func parse_lstree_entry(lsentry string) (mode uint32, type_ string, sha1 Sha1, filename string, err error) {
+    // <mode> SP <type> SP <object> TAB <file>      # NOTE file can contain spaces
+    __, filename, err1 := headtail(lsentry, "\t")
+    _, err2 := fmt.Sscanf(__, "%o %s %s\n", &mode, &type_, &sha1)
+
+    if err1 != nil || err2 != nil {
+        return 0, "", Sha1{}, "", &InvalidLstreeEntry{lsentry}
+    }
+
+    // parsed ok
+    return
+}
+
+type InvalidLstreeEntry struct {
+    lsentry string
+}
+
+func (e *InvalidLstreeEntry) Error() string {
+    return fmt.Sprintf("invalid ls-tree entry %q", e.lsentry)
+}
--- a/misc.go
+++ b/misc.go
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file (in go.git repository).
+
+package main
+
+import (
+    "flag"
+    "fmt"
+    "strconv"
+)
+
+// flag that is both bool and int - for e.g. handling -v -v -v ...
+// inspired/copied by/from cmd.dist.count in go.git
+type countFlag int
+
+func (c *countFlag) String() string {
+    return fmt.Sprint(int(*c))
+}
+
+func (c *countFlag) Set(s string) error {
+    switch s {
+    case "true":
+        *c++
+    case "false":
+        *c = 0
+    default:
+        n, err := strconv.Atoi(s)
+        if err != nil {
+            return fmt.Errorf("invalid count %q", s)
+        }
+        *c = countFlag(n)
+    }
+    return nil
+}
+
+// flag.boolFlag
+func (c *countFlag) IsBoolFlag() bool {
+    return true
+}
+
+// flag.Value
+var _ flag.Value = (*countFlag)(nil)
--- a/set.go
+++ b/set.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+// Git-backup | Set "template" type
+// TODO -> go:generate + template
+package main
+
+// Set<Sha1>
+type Sha1Set map[Sha1]struct{}
+
+func (s Sha1Set) Add(v Sha1) {
+    s[v] = struct{}{}
+}
+
+func (s Sha1Set) Contains(v Sha1) bool {
+    _, ok := s[v]
+    return ok
+}
+
+// all elements of set as slice
+func (s Sha1Set) Elements() []Sha1 {
+    ev := make([]Sha1, len(s))
+    i := 0
+    for e := range s {
+        ev[i] = e
+        i++
+    }
+    return ev
+}
+
+// Set<string>
+type StrSet map[string]struct{}
+
+func (s StrSet) Add(v string) {
+    s[v] = struct{}{}
+}
+
+func (s StrSet) Contains(v string) bool {
+    _, ok := s[v]
+    return ok
+}
+
+// all elements of set as slice
+func (s StrSet) Elements() []string {
+    ev := make([]string, len(s))
+    i := 0
+    for e := range s {
+        ev[i] = e
+        i++
+    }
+    return ev
+}
--- a/sha1.go
+++ b/sha1.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+// Git-backup | Sha1 type to work with SHA1 oids
+package main
+
+import (
+    "bytes"
+    "encoding/hex"
+    "fmt"
+)
+
+const SHA1_RAWSIZE = 20
+
+// SHA1 value in raw form
+// NOTE zero value of Sha1{} is NULL sha1
+// NOTE Sha1 size is 20 bytes. On amd64
+//      - string size = 16 bytes
+//      - slice  size = 24 bytes
+//      -> so it is reasonable to pass Sha1 not by reference
+type Sha1 struct {
+    sha1 [SHA1_RAWSIZE]byte
+}
+
+// fmt.Stringer
+var _ fmt.Stringer = Sha1{}
+
+func (sha1 Sha1) String() string {
+    return hex.EncodeToString(sha1.sha1[:])
+}
+
+func Sha1Parse(sha1str string) (Sha1, error) {
+    sha1 := Sha1{}
+    if hex.DecodedLen(len(sha1str)) != SHA1_RAWSIZE {
+        return Sha1{}, fmt.Errorf("sha1parse: %q invalid", sha1str)
+    }
+    _, err := hex.Decode(sha1.sha1[:], Bytes(sha1str))
+    if err != nil {
+        return Sha1{}, fmt.Errorf("sha1parse: %q invalid: %s", sha1str, err)
+    }
+
+    return sha1, nil
+}
+
+// fmt.Scanner
+var _ fmt.Scanner = (*Sha1)(nil)
+
+func (sha1 *Sha1) Scan(s fmt.ScanState, ch rune) error {
+    switch ch {
+    case 's', 'v':
+    default:
+        return fmt.Errorf("Sha1.Scan: invalid verb %q", ch)
+    }
+
+    tok, err := s.Token(true, nil)
+    if err != nil {
+        return err
+    }
+
+    *sha1, err = Sha1Parse(String(tok))
+    return err
+}
+
+// check whether sha1 is null
+func (sha1 *Sha1) IsNull() bool {
+    return *sha1 == Sha1{}
+}
+
+// for sorting by Sha1
+type BySha1 []Sha1
+
+func (p BySha1) Len() int           { return len(p) }
+func (p BySha1) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+func (p BySha1) Less(i, j int) bool { return bytes.Compare(p[i].sha1[:], p[j].sha1[:]) < 0 }
--- a/testdata/1/dir 2 + β/empty repo.git/HEAD
+++ b/testdata/1/dir 2 + β/empty repo.git/HEAD
+ref: refs/heads/master
--- a/testdata/1/dir 2 + β/empty repo.git/config
+++ b/testdata/1/dir 2 + β/empty repo.git/config
+[core]
+	repositoryformatversion = 0
+	filemode = true
+	bare = true
--- a/testdata/1/dir 2 + β/empty repo.git/description
+++ b/testdata/1/dir 2 + β/empty repo.git/description
+Unnamed repository; edit this file 'description' to name the repository.
--- a/testdata/1/dir 2 + β/empty repo.git/info/exclude
+++ b/testdata/1/dir 2 + β/empty repo.git/info/exclude
+# git ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~
--- a/testdata/1/dir 2 + β/empty repo.git/objects/.keep
+++ b/testdata/1/dir 2 + β/empty repo.git/objects/.keep
--- a/testdata/1/dir 2 + β/empty repo.git/refs/.keep
+++ b/testdata/1/dir 2 + β/empty repo.git/refs/.keep
--- a/testdata/1/dir 2 + β/file 2
+++ b/testdata/1/dir 2 + β/file 2
+Hello World 2
--- a/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/HEAD
+++ b/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/HEAD
+ref: refs/heads/master
--- a/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/config
+++ b/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/config
+[core]
+	repositoryformatversion = 0
+	filemode = true
+	bare = true
--- a/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/description
+++ b/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/description
+Unnamed repository; edit this file 'description' to name the repository.
--- a/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/info/exclude
+++ b/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/info/exclude
+# git ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~
--- a/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/objects/c8/a5d21a94426b0df61c6876a73cdee587fe8a12
+++ b/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/objects/c8/a5d21a94426b0df61c6876a73cdee587fe8a12
--- a/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/objects/cb/b6d3f205749888f77fb1a88fbac3b8a0b8000f
+++ b/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/objects/cb/b6d3f205749888f77fb1a88fbac3b8a0b8000f
+x;
+B1@QbzA&" |l/ncҫb" shKHZĐ\d-:M({='íq6?pE_*縀49F8F{ݏr|N[e8
\ No newline at end of file
--- a/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/objects/e4/3cf7bdda456b06e15008b047cd1fd384a255b0
+++ b/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/objects/e4/3cf7bdda456b06e15008b047cd1fd384a255b0
--- a/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/refs/heads/master
+++ b/testdata/1/dir 2 + β/repo with+fragile name %αβγ.git/refs/heads/master
+cbb6d3f205749888f77fb1a88fbac3b8a0b8000f
--- a/testdata/1/dir/hello.git/HEAD
+++ b/testdata/1/dir/hello.git/HEAD
+ref: refs/heads/master
--- a/testdata/1/dir/hello.git/config
+++ b/testdata/1/dir/hello.git/config
+[core]
+	repositoryformatversion = 0
+	filemode = true
+	bare = true
--- a/testdata/1/dir/hello.git/description
+++ b/testdata/1/dir/hello.git/description
+Unnamed repository; edit this file 'description' to name the repository.
--- a/testdata/1/dir/hello.git/info/exclude
+++ b/testdata/1/dir/hello.git/info/exclude
+# git ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~
--- a/testdata/1/dir/hello.git/objects/04/3380ba73301364eac169038e1a461c2550854b
+++ b/testdata/1/dir/hello.git/objects/04/3380ba73301364eac169038e1a461c2550854b
+xA
+0E]ZL24 ޥ'h)6(,<=䜚`2K#Zyvcqz",F+Mzb2ґa0,`Rx?!Fc=JTӶ{m-oy^Sם?)|֎В<*?lw;_+a_C
\ No newline at end of file
--- a/testdata/1/dir/hello.git/objects/07/64b6ac0f3bf0be613ae0fa3e3f8e86a42a3320
+++ b/testdata/1/dir/hello.git/objects/07/64b6ac0f3bf0be613ae0fa3e3f8e86a42a3320
+xA
+0@Q9E$3I"]zdfۅp5&	mD9"`&&$@=cD@eNcfRk<[QZ,.ZYvێj=HD
\ No newline at end of file
--- a/testdata/1/dir/hello.git/objects/0a/03dda131ff339d923bca1912b91bd4bf07bb50
+++ b/testdata/1/dir/hello.git/objects/0a/03dda131ff339d923bca1912b91bd4bf07bb50
--- a/testdata/1/dir/hello.git/objects/11/e67095628aa17b03436850e690faea3006c25d
+++ b/testdata/1/dir/hello.git/objects/11/e67095628aa17b03436850e690faea3006c25d
+x5
+0ayKeҀ{>%6m$ѷ7P\ſN{ >zrh@IoDMʑtFEd}fb\H.
-Ք3WKyyI^7pLPK֨I9G.]n-
\ No newline at end of file
--- a/testdata/1/dir/hello.git/objects/1f/58fa8cd67704ae3a8650ed0034839c142102c8
+++ b/testdata/1/dir/hello.git/objects/1f/58fa8cd67704ae3a8650ed0034839c142102c8
--- a/testdata/1/dir/hello.git/objects/35/4caa307c647cadd3a9c3bffaaa1a72c1ea1dac
+++ b/testdata/1/dir/hello.git/objects/35/4caa307c647cadd3a9c3bffaaa1a72c1ea1dac
+xA
+0@Q9Ed&&)w	CVBo?m
C"z
+G3-SLFA@ 
+MG'2vaS8&2'^U}5/YsG8V.8NP{m= N}
^DX
\ No newline at end of file
--- a/testdata/1/dir/hello.git/objects/43/0b21fcf673a749042883edbecc7962a0735b60
+++ b/testdata/1/dir/hello.git/objects/43/0b21fcf673a749042883edbecc7962a0735b60
--- a/testdata/1/dir/hello.git/objects/4d/ba4af84242f614d0c128d0fea3c17b2704deab
+++ b/testdata/1/dir/hello.git/objects/4d/ba4af84242f614d0c128d0fea3c17b2704deab
--- a/testdata/1/dir/hello.git/objects/58/1caa0fe56cf01dc028cc0b089d364993e046b6
+++ b/testdata/1/dir/hello.git/objects/58/1caa0fe56cf01dc028cc0b089d364993e046b6
+x+)JMU06g040031QH+(a/`E۪{E	2<,
\ No newline at end of file
--- a/testdata/1/dir/hello.git/objects/61/882eb85774ed4401681d800bb9c638031375e2
+++ b/testdata/1/dir/hello.git/objects/61/882eb85774ed4401681d800bb9c638031375e2
--- a/testdata/1/dir/hello.git/objects/64/7e137fd3b31939b36889eba854a298ef97b6ff
+++ b/testdata/1/dir/hello.git/objects/64/7e137fd3b31939b36889eba854a298ef97b6ff
--- a/testdata/1/dir/hello.git/objects/71/24713e403925bc772cd252b0dec099f3ced9c5
+++ b/testdata/1/dir/hello.git/objects/71/24713e403925bc772cd252b0dec099f3ced9c5
+x-K!]sޛ14.=2
abK6O#nJC4Rqb@#
+tF8/e40Y0jX	Ol݇C_58KBeϹ㍞4]aZ	+-_~o7,
\ No newline at end of file
--- a/testdata/1/dir/hello.git/objects/7a/3343f584218e973165d943d7c0af47a52ca477
+++ b/testdata/1/dir/hello.git/objects/7a/3343f584218e973165d943d7c0af47a52ca477
--- a/testdata/1/dir/hello.git/objects/89/23fcbcf7c2d6aee702fc3af36179674ebc6141
+++ b/testdata/1/dir/hello.git/objects/89/23fcbcf7c2d6aee702fc3af36179674ebc6141
--- a/testdata/1/dir/hello.git/objects/8c/36e6323e3cd071201aa56c8689d8d5d76b3818
+++ b/testdata/1/dir/hello.git/objects/8c/36e6323e3cd071201aa56c8689d8d5d76b3818
--- a/testdata/1/dir/hello.git/objects/98/0a0d5f19a64b4b30a87d4206aade58726b60e3
+++ b/testdata/1/dir/hello.git/objects/98/0a0d5f19a64b4b30a87d4206aade58726b60e3
--- a/testdata/1/dir/hello.git/objects/aa/2c51cfead167dd56080d3c9934f0569f1047f5
+++ b/testdata/1/dir/hello.git/objects/aa/2c51cfead167dd56080d3c9934f0569f1047f5
--- a/testdata/1/dir/hello.git/objects/b7/ee9ad7102de1f84cfdbc1e70d3bb844d241ddc
+++ b/testdata/1/dir/hello.git/objects/b7/ee9ad7102de1f84cfdbc1e70d3bb844d241ddc
+xͱ
+0a<\BG\14 D3񳖒؞@,3Wqa8:AOݞZjET%_Z`=Qz{8C4{ݏM*9+<h~,97
\ No newline at end of file
--- a/testdata/1/dir/hello.git/objects/ba/899e5639273a6fa4d50d684af8db1ae070351e
+++ b/testdata/1/dir/hello.git/objects/ba/899e5639273a6fa4d50d684af8db1ae070351e
--- a/testdata/1/dir/hello.git/objects/ba/9c13bac5b12f73b2d8f8d2b4be8d23a808ccf8
+++ b/testdata/1/dir/hello.git/objects/ba/9c13bac5b12f73b2d8f8d2b4be8d23a808ccf8
--- a/testdata/1/dir/hello.git/objects/bd/7195c104b27b1754ce3d60e9e700946b69c458
+++ b/testdata/1/dir/hello.git/objects/bd/7195c104b27b1754ce3d60e9e700946b69c458
--- a/testdata/1/dir/hello.git/objects/d4/99d60deffbd1f156e46e280b8dd1c4035e2db3
+++ b/testdata/1/dir/hello.git/objects/d4/99d60deffbd1f156e46e280b8dd1c4035e2db3
--- a/testdata/1/dir/hello.git/objects/e1/4b1cb9ad4e5120be959593996b777573f7432c
+++ b/testdata/1/dir/hello.git/objects/e1/4b1cb9ad4e5120be959593996b777573f7432c
--- a/testdata/1/dir/hello.git/objects/f7/35011c9fcece41219729a33f7876cd8791f659
+++ b/testdata/1/dir/hello.git/objects/f7/35011c9fcece41219729a33f7876cd8791f659
--- a/testdata/1/dir/hello.git/objects/fa/d8102e8ab929bca0d6d4bc880d5849e101044a
+++ b/testdata/1/dir/hello.git/objects/fa/d8102e8ab929bca0d6d4bc880d5849e101044a
--- a/testdata/1/dir/hello.git/objects/fe/eed96ca75fcf8dcf183008f61dbf72e91ab4de
+++ b/testdata/1/dir/hello.git/objects/fe/eed96ca75fcf8dcf183008f61dbf72e91ab4de
+xKj1Pu`V}>#QdG ,xEZ&
+:eXmb0Lgl萂9mYB>/٥D޳9귭Wie]Zeoo>#\y0qcvZю]mi,YC/=F
\ No newline at end of file
--- a/testdata/1/dir/hello.git/packed-refs
+++ b/testdata/1/dir/hello.git/packed-refs
+# pack-refs with: peeled fully-peeled 
+647e137fd3b31939b36889eba854a298ef97b6ff refs/heads/branch2
+feeed96ca75fcf8dcf183008f61dbf72e91ab4de refs/heads/master
+f735011c9fcece41219729a33f7876cd8791f659 refs/tags/tag-to-commit
+^354caa307c647cadd3a9c3bffaaa1a72c1ea1dac
+7124713e403925bc772cd252b0dec099f3ced9c5 refs/tags/tag-to-tag
+^354caa307c647cadd3a9c3bffaaa1a72c1ea1dac
+ba899e5639273a6fa4d50d684af8db1ae070351e refs/tags/tag-to-tree
+^e14b1cb9ad4e5120be959593996b777573f7432c
+7a3343f584218e973165d943d7c0af47a52ca477 refs/test/ref-to-blob
--- a/testdata/1/dir/hello.git/refs/tags/tag-to-blob
+++ b/testdata/1/dir/hello.git/refs/tags/tag-to-blob
+11e67095628aa17b03436850e690faea3006c25d
--- a/testdata/1/dir/hello.git/refs/test/ref-to-tree
+++ b/testdata/1/dir/hello.git/refs/test/ref-to-tree
+61882eb85774ed4401681d800bb9c638031375e2
--- a/testdata/1/dir/world.txt
+++ b/testdata/1/dir/world.txt
+Zzz
--- a/testdata/1/file
+++ b/testdata/1/file
+Hello World!
--- a/testdata/1/file with space + α
+++ b/testdata/1/file with space + α
+Hello World! zzz
--- a/testdata/1/fileexec
+++ b/testdata/1/fileexec
+#!/bin/sh
+
+exec echo "Hello"
--- a/testdata/1/symlink.dir
+++ b/testdata/1/symlink.dir
+dir
\ No newline at end of file
--- a/testdata/1/symlink.file
+++ b/testdata/1/symlink.file
+file with space + α
\ No newline at end of file
--- a/testdata/1/symlink.missing
+++ b/testdata/1/symlink.missing
+bbb
\ No newline at end of file
--- a/testdata/2/corrupt.git/HEAD
+++ b/testdata/2/corrupt.git/HEAD
+ref: refs/heads/master
--- a/testdata/2/corrupt.git/config
+++ b/testdata/2/corrupt.git/config
+[core]
+	repositoryformatversion = 0
+	filemode = true
+	bare = true
--- a/testdata/2/corrupt.git/description
+++ b/testdata/2/corrupt.git/description
+Unnamed repository; edit this file 'description' to name the repository.
--- a/testdata/2/corrupt.git/info/exclude
+++ b/testdata/2/corrupt.git/info/exclude
+# git ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~
--- a/testdata/2/corrupt.git/objects/28/c06ba333326af5266297b8aa21051f294f298d
+++ b/testdata/2/corrupt.git/objects/28/c06ba333326af5266297b8aa21051f294f298d
+xK
+0a9<:mAĝ M&thb ZtГ;fA=%2	HuGqD0`͋wi3<
+签Ik'9)rHj8ET{ݏVk$#}W?G;|
\ No newline at end of file
--- a/testdata/2/corrupt.git/objects/2b/01966f6282c06734fad0cd7000c53c3bf0349b
+++ b/testdata/2/corrupt.git/objects/2b/01966f6282c06734fad0cd7000c53c3bf0349b
--- a/testdata/2/corrupt.git/objects/c6/c31ba413a4588cac7f77919bfcbe4adbf1d3b4
+++ b/testdata/2/corrupt.git/objects/c6/c31ba413a4588cac7f77919bfcbe4adbf1d3b4
--- a/testdata/2/corrupt.git/objects/c6c31ba413a4588cac7f77919bfcbe4adbf1d3b4.orig
+++ b/testdata/2/corrupt.git/objects/c6c31ba413a4588cac7f77919bfcbe4adbf1d3b4.orig
--- a/testdata/2/corrupt.git/objects/corruptit.py
+++ b/testdata/2/corrupt.git/objects/corruptit.py
+#!/usr/bin/env python
+# make corruption to c6c31ba413a4588cac7f77919bfcbe4adbf1d3b4 loose object
+import os, zlib
+
+def readfile(path):
+    with open(path, 'r') as f:
+        return f.read()
+
+def writefile(path, data):
+    try:
+        os.unlink(path)
+    except OSError:
+        pass
+    with open(path, 'w') as f:
+        f.write(data)
+
+
+z = readfile("c6c31ba413a4588cac7f77919bfcbe4adbf1d3b4.orig")
+print `z`
+
+d = zlib.decompress(z)
+print `d`
+
+D = d.replace('good', 'BAAD')
+print `D`
+
+Z = zlib.compress(D)
+print `Z`
+
+writefile("c6/c31ba413a4588cac7f77919bfcbe4adbf1d3b4", Z)
--- a/testdata/2/corrupt.git/refs/heads/master
+++ b/testdata/2/corrupt.git/refs/heads/master
+28c06ba333326af5266297b8aa21051f294f298d
--- a/util.go
+++ b/util.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+// Git-backup | Miscellaneous utilities
+package main
+
+import (
+    "encoding/hex"
+    "fmt"
+    "os"
+    "reflect"
+    "strings"
+    "syscall"
+    "unicode"
+    "unicode/utf8"
+    "unsafe"
+)
+
+// string -> []byte without copying
+func Bytes(s string) []byte {
+    var b []byte
+    bp := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+    bp.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
+    bp.Cap = len(s)
+    bp.Len = len(s)
+    return b
+}
+
+// []byte -> string without copying
+func String(b []byte) string {
+    var s string
+    sp := (*reflect.StringHeader)(unsafe.Pointer(&s))
+    sp.Data = (*reflect.SliceHeader)(unsafe.Pointer(&b)).Data
+    sp.Len = len(b)
+    return s
+}
+
+// split string by sep and expect exactly 2 parts
+func split2(s, sep string) (s1, s2 string, err error) {
+    parts := strings.Split(s, sep)
+    if len(parts) != 2 {
+        return "", "", fmt.Errorf("split2: %q has %v parts (expected 2, sep: %q)", s, len(parts), sep)
+    }
+    return parts[0], parts[1], nil
+}
+
+// (head+sep+tail) -> head, tail
+func headtail(s, sep string) (head, tail string, err error) {
+    parts := strings.SplitN(s, sep, 2)
+    if len(parts) != 2 {
+        return "", "", fmt.Errorf("headtail: %q has no %q", s, sep)
+    }
+    return parts[0], parts[1], nil
+}
+
+// strip_prefix("/a/b", "/a/b/c/d/e") -> "c/d/e" (without leading /)
+// path must start with prefix
+func strip_prefix(prefix, path string) string {
+    if !strings.HasPrefix(path, prefix) {
+        panic(fmt.Errorf("strip_prefix: %q has no prefix %q", path, prefix))
+    }
+    path = path[len(prefix):]
+    for strings.HasPrefix(path, "/") {
+        path = path[1:] // strip leading /
+    }
+    return path
+}
+
+// reprefix("/a", "/b", "/a/str") -> "/b/str"
+// path must start with prefix_from
+func reprefix(prefix_from, prefix_to, path string) string {
+    path = strip_prefix(prefix_from, path)
+    return fmt.Sprintf("%s/%s", prefix_to, path)
+}
+
+// like ioutil.WriteFile() but takes native mode/perm
+func writefile(path string, data []byte, perm uint32) error {
+    fd, err := syscall.Open(path, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_TRUNC, perm)
+    if err != nil {
+        return &os.PathError{"open", path, err}
+    }
+    f := os.NewFile(uintptr(fd), path)
+    _, err = f.Write(data)
+    err2 := f.Close()
+    if err == nil {
+        err = err2
+    }
+    return err
+}
+
+// escape path so that git is happy to use it as ref
+// https://git.kernel.org/cgit/git/git.git/tree/refs.c?h=v2.9.0-37-g6d523a3#n34
+// XXX very suboptimal
+func path_refescape(path string) string {
+    outv := []string{}
+    for _, component := range strings.Split(path, "/") {
+        out := ""
+        dots := 0 // number of seen consecutive dots
+        for len(component) > 0 {
+            r, size := utf8.DecodeRuneInString(component)
+
+            // no ".." anywhere - we replace dots run to %46%46... with trailing "."
+            // this way for single "." case we'll have it intact and avoid .. anywhere
+            // also this way: trailing .git is always encoded as ".git"
+            if r == '.' {
+                dots += 1
+                component = component[size:]
+                continue
+            }
+            if dots != 0 {
+                out += strings.Repeat(escape("."), dots-1)
+                out += "."
+                dots = 0
+            }
+
+            rbytes := component[:size]
+            if shouldEscape(r) {
+                rbytes = escape(rbytes)
+            }
+            out += rbytes
+            component = component[size:]
+        }
+
+        // handle trailing dots
+        if dots != 0 {
+            out += strings.Repeat(escape("."), dots-1)
+            out += "."
+        }
+
+        if len(out) > 0 {
+            // ^. not allowed
+            if out[0] == '.' {
+                out = escape(".") + out[1:]
+            }
+            // .lock$ not allowed
+            if strings.HasSuffix(out, ".lock") {
+                out = out[:len(out)-5] + escape(".") + "lock"
+            }
+        }
+        outv = append(outv, out)
+    }
+
+    // strip trailing /
+    for len(outv) > 0 {
+        if len(outv[len(outv)-1]) != 0 {
+            break
+        }
+        outv = outv[:len(outv)-1]
+    }
+    return strings.Join(outv, "/")
+}
+
+func shouldEscape(r rune) bool {
+    if unicode.IsSpace(r) || unicode.IsControl(r) {
+        return true
+    }
+    switch r {
+    // NOTE RuneError is for always escaping non-valid UTF-8
+    case ':', '?', '[', '\\', '^', '~', '*', '@', '%', utf8.RuneError:
+        return true
+    }
+    return false
+}
+
+func escape(s string) string {
+    out := ""
+    for i := 0; i < len(s); i++ {
+        out += fmt.Sprintf("%%%02X", s[i])
+    }
+    return out
+}
+
+// unescape path encoded by path_refescape()
+// decoding is permissive - any byte can be %-encoded, not only special cases
+// XXX very suboptimal
+func path_refunescape(s string) (string, error) {
+    l := len(s)
+    out := make([]byte, 0, len(s))
+    for i := 0; i < l; i++ {
+        c := s[i]
+        if c == '%' {
+            if i+2 >= l {
+                return "", EscapeError(s)
+            }
+            b, err := hex.DecodeString(s[i+1:i+3])
+            if err != nil {
+                return "", EscapeError(s)
+            }
+
+            c = b[0]
+            i += 2
+        }
+        out = append(out, c)
+    }
+    return String(out), nil
+}
+
+type EscapeError string
+
+func (e EscapeError) Error() string {
+    return fmt.Sprintf("%q: invalid escape format", string(e))
+}
--- a/util_test.go
+++ b/util_test.go
+// Copyright (C) 2015-2016  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+
+package main
+
+import (
+    "reflect"
+    "strings"
+    "testing"
+)
+
+// check that String() and Bytes() create correct objects which alias original object memory
+func TestStringBytes(t *testing.T) {
+    s := "Hello"
+    b := []byte(s)
+
+    s1 := String(b)
+    b1 := Bytes(s1)
+    if s1 != s                      { t.Error("string -> []byte -> String != Identity") }
+    if !reflect.DeepEqual(b1, b)    { t.Error("[]byte -> String -> Bytes != Identity") }
+    b[0] = 'I'
+    if s  != "Hello"                { t.Error("string -> []byte not copied") }
+    if s1 != "Iello"                { t.Error("[]byte -> String not aliased") }
+    if !reflect.DeepEqual(b1, b)    { t.Error("string -> Bytes  not aliased") }
+}
+
+func TestSplit2(t *testing.T) {
+    var tests = []struct { input, s1, s2 string; ok bool } {
+        {"", "", "", false},
+        {" ", "", "", true},
+        {"hello", "", "", false},
+        {"hello world", "hello", "world", true},
+        {"hello world 1", "", "", false},
+    }
+
+    for _, tt := range tests {
+        s1, s2, err := split2(tt.input, " ")
+        ok := err == nil
+        if s1 != tt.s1 || s2 != tt.s2 || ok != tt.ok {
+            t.Errorf("split2(%q) -> %q %q %v  ; want %q %q %v", tt.input, s1, s2, ok, tt.s1, tt.s2, tt.ok)
+        }
+    }
+}
+
+func TestHeadtail(t *testing.T) {
+    var tests = []struct { input, head, tail string; ok bool } {
+        {"",                "", "", false},
+        {" ",               "", "", true},
+        {"  ",              "", " ", true},
+        {"hello world",     "hello", "world", true},
+        {"hello world 1",   "hello", "world 1", true},
+        {"hello  world 2",  "hello", " world 2", true},
+    }
+
+    for _, tt := range tests {
+        head, tail, err := headtail(tt.input, " ")
+        ok := err == nil
+        if head != tt.head || tail != tt.tail || ok != tt.ok {
+            t.Errorf("headtail(%q) -> %q %q %v  ; want %q %q %v", tt.input, head, tail, ok, tt.head, tt.tail, tt.ok)
+        }
+    }
+}
+
+func TestPathEscapeUnescape(t *testing.T) {
+    type TestEntry struct { path string; escapedv []string }
+    te := func(path string, escaped ...string) TestEntry {
+        return TestEntry{path, escaped}
+    }
+    var tests = []TestEntry{
+        //  path           escaped        non-canonical escapes
+        te("hello/world", "hello/world", "%68%65%6c%6c%6f%2f%77%6f%72%6c%64"),
+        te("hello/мир",   "hello/мир"),
+        te("hello/ мир",  "hello/%20мир"),
+        te("hel%lo/мир",  "hel%25lo/мир"),
+        te(".hello/.world", "%2Ehello/%2Eworld"),
+        te("..hello/world.loc", "%2E.hello/world.loc"),
+        te("..hello/world.lock", "%2E.hello/world%2Elock"),
+        // leading /
+        te("/hello/world", "/hello/world"),
+        te("//hello///world", "//hello///world"),
+        // trailing /
+        te("/hello/world/", "/hello/world"),
+        te("/hello/world//", "/hello/world"),
+
+        // trailing ...
+        te("/hello/world.", "/hello/world."),
+        te("/hello/world..", "/hello/world%2E."),
+        te("/hello/world...", "/hello/world%2E%2E."),
+        te("/hello/world...git", "/hello/world%2E%2E.git"),
+
+        // .. anywhere
+        te("/hello/./world",    "/hello/%2E/world"),
+        te("/hello/.a/world",   "/hello/%2Ea/world"),
+        te("/hello/a./world",   "/hello/a./world"),
+        te("/hello/../world",   "/hello/%2E./world"),
+        te("/hello/a..b/world", "/hello/a%2E.b/world"),
+        te("/hello/a.c.b/world", "/hello/a.c.b/world"),
+        te("/hello/a.c..b/world", "/hello/a.c%2E.b/world"),
+
+        // special & control characters
+        te("/hel lo/wor\tld/a:?[\\^~*@%b/\001\004\n\xc2\xa0", "/hel%20lo/wor%09ld/a%3A%3F%5B%5C%5E%7E%2A%40%25b/%01%04%0A%C2%A0"),
+
+        // utf8 error
+        te("a\xc5z",    "a%C5z"),
+    }
+
+    for _, tt := range tests {
+        escaped := path_refescape(tt.path)
+        if escaped != tt.escapedv[0] {
+            t.Errorf("path_refescape(%q) -> %q  ; want %q", tt.path, escaped, tt.escapedv[0])
+        }
+        // also check the decoding
+        pathok := strings.TrimRight(tt.path, "/")
+        for _, escaped := range tt.escapedv {
+            unescaped, err := path_refunescape(escaped)
+            if unescaped != pathok || err != nil {
+                t.Errorf("path_refunescape(%q) -> %q %v  ; want %q nil", escaped, unescaped, err, tt.path)
+            }
+        }
+    }
+}
+
+func TestPathUnescapeErr(t *testing.T) {
+    var tests = []struct{ escaped string }{
+        {"%"},
+        {"%2"},
+        {"%2q"},
+        {"hell%2q/world"},
+    }
+
+    for _, tt := range tests {
+        unescaped, err := path_refunescape(tt.escaped)
+        if err == nil || unescaped != "" {
+            t.Errorf("path_refunescape(%q) -> %q %v  ; want \"\" err", tt.escaped, unescaped, err)
+        }
+    }
+}