Commit 28986e0e authored by Kirill Smelkov's avatar Kirill Smelkov

Rewrite in Go

This is more-or-less 1-to-1 port of git-backup to Go. There are things
we handle a bit differently:

- there is a separate type for Sha1
- conversion of repo paths to git references is now more robust wrt
  avoiding not-allowed in git constructs like ".." or ".lock"

  https://git.kernel.org/cgit/git/git.git/tree/refs.c?h=v2.9.0-37-g6d523a3#n34

The rewrite happened because we need to optimize restore, and for e.g.
parallelizing part it should be convenient to use goroutines and channels.

I'm not very comfortable with how error handling is done, because
contrary to what canonical Go way seems to be, in a lot of places it still
looks to me exceptions are better idea compared to just error codes,
though in many places just error codes are better and makes more sense.
Probably there will be less exceptions over time once the code starts to
be collaborating set of goroutines with communications done via
channels.

Still a lot of python habits on my side.

And as a bonus we now have end-to-end pull/restore tests...
parent a6cfe210
git-backup
......@@ -50,12 +50,12 @@ Backup workflow is:
$ git pull ...
Please see `git-backup`__ source with technical overview on how it works.
Please see `git-backup.go`__ source with technical overview on how it works.
We also provide convenience program to pull/restore backup data for a GitLab
instance into/from git-backup managed repository. See `contrib/gitlab-backup`__
for details.
__ git-backup
__ git-backup.go
__ contrib/gitlab-backup
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// Git-backup | Exception-style errors
package main
import (
"fmt"
"runtime"
"strings"
)
// error type which is raised by raise(arg)
type Error struct {
arg interface{}
link *Error // chain of linked Error(s) - see e.g. errcontext()
}
func (e *Error) Error() string {
msgv := []string{}
msg := ""
for e != nil {
// TODO(go1.7) -> runtime.Frame (see xtraceback())
if f, ok := e.arg.(Frame); ok {
//msg = f.Function
//msg = fmt.Sprintf("%s (%s:%d)", f.Function, f.File, f.Line)
msg = strings.TrimPrefix(f.Name(), _errorpkgdot) // XXX -> better prettyfunc
} else {
msg = fmt.Sprint(e.arg)
}
msgv = append(msgv, msg)
e = e.link
}
return strings.Join(msgv, ": ")
}
// turn any value into Error
// if v is already Error - it stays the same
// otherwise new Error is created
func aserror(v interface{}) *Error {
if e, ok := v.(*Error); ok {
return e
}
return &Error{v, nil}
}
// raise error to upper level
func raise(arg interface{}) {
panic(aserror(arg))
}
// raise formatted string
func raisef(format string, a ...interface{}) {
raise(fmt.Sprintf(format, a...))
}
// raise if err != nil
// NOTE err can be != nil even if typed obj = nil:
// var obj *T;
// err = obj
// err != nil is true
func raiseif(err error) {
//if err != nil && !reflect.ValueOf(err).IsNil() {
if err != nil {
raise(err)
}
}
// checks recovered value to be of *Error
// if there is non-Error error - repanic it
// otherwise return Error either nil (no panic), or actual value
func _errcatch(r interface{}) *Error {
e, _ := r.(*Error)
if e == nil && r != nil {
panic(r)
}
return e
}
// catch error and call f(e) if it was caught.
// must be called under defer
func errcatch(f func(e *Error)) {
e := _errcatch(recover())
if e == nil {
return
}
f(e)
}
// be notified when error unwinding is being happening.
// hook into unwinding process with f() call. Returned error is reraised.
// see also: errcontext()
// must be called under defer
func erronunwind(f func(e *Error) *Error) {
// cannot do errcatch(...)
// as recover() works only in first-level called functions
e := _errcatch(recover())
if e == nil {
return
}
e = f(e)
panic(e)
}
// provide error context to automatically add on unwinding.
// f is called if error unwinding is happening.
// call result is added to raised error as "prefix" context
// must be called under defer
func errcontext(f func() interface{}) {
e := _errcatch(recover())
if e == nil {
return
}
arg := f()
panic(erraddcontext(e, arg))
}
// add "prefix" context to error
func erraddcontext(e *Error, arg interface{}) *Error {
return &Error{arg, e}
}
func _myfuncname(nskip int) string {
pcv := [1]uintptr{}
runtime.Callers(nskip, pcv[:])
f := runtime.FuncForPC(pcv[0])
if f == nil {
return ""
}
return f.Name()
}
// get name of currently running function (caller of myfuncname())
// name is fully qualified package/name.function(.x)
func myfuncname() string {
return _myfuncname(3)
}
// get name of currently running function's package
// package is fully qualified package/name
func mypkgname() string {
myfunc := _myfuncname(3)
if myfunc == "" {
return ""
}
// NOTE dots in package name are escaped by go as %2e
// this way the first dot is delimiter between package and function
idot := strings.IndexByte(myfunc, '.')
if idot == -1 {
panic(fmt.Errorf("funcname %q is not fully qualified", myfunc))
}
return myfunc[:idot]
}
// TODO(go1.7) goes away in favour of runtime.Frame
type Frame struct {
*runtime.Func
pc uintptr
}
// get current calling traceback as []Frame
// nskip meaning: the same as in runtime.Callers()
// TODO(go1.7) []Frame -> []runtime.Frame
func xtraceback(nskip int) []Frame {
// all callers
var pcv = []uintptr{0}
for {
pcv = make([]uintptr, 2*len(pcv))
n := runtime.Callers(nskip+1, pcv)
if n < len(pcv) {
pcv = pcv[:n]
break
}
}
// pcv -> frames
/*
framev := make([]runtime.Frame, 0, len(pcv))
frames := runtime.CallersFrames(pcv)
for more := true; more; {
var frame runtime.Frame
frame, more = frames.Next()
framev = append(framev, frame)
}
*/
framev := make([]Frame, 0, len(pcv))
for _, pc := range pcv {
framev = append(framev, Frame{runtime.FuncForPC(pc), pc})
}
return framev
}
var (
_errorpkgname string // package name under which error.go lives
_errorpkgdot string // errorpkg.
_errorraise string // errorpkg.raise
)
func init() {
_errorpkgname = mypkgname()
_errorpkgdot = _errorpkgname + "."
_errorraise = _errorpkgname + ".raise"
}
// add calling context to error.
// Add calling function names as error context up-to topfunc not including.
// see also: erraddcontext()
func erraddcallingcontext(topfunc string, e *Error) *Error {
seenraise := false
for _, f := range xtraceback(2) {
// do not show anything after raise*()
if !seenraise && strings.HasPrefix(f.Name(), _errorraise) {
seenraise = true
continue
}
if !seenraise {
continue
}
// do not go beyond topfunc
if topfunc != "" && f.Name() == topfunc {
break
}
// skip intermediates
if strings.HasSuffix(f.Name(), "_") { // XXX -> better skipfunc
continue
}
e = &Error{f, e}
}
return e
}
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
package main
import (
"strings"
"testing"
)
func do_raise1() {
raise(1)
}
func TestErrRaiseCatch(t *testing.T) {
defer errcatch(func(e *Error) {
if !(e.arg == 1 && e.link == nil) {
t.Fatalf("error caught but unexpected: %#v ; want {1, nil}", e)
}
})
do_raise1()
t.Fatal("error not caught")
}
// verify err chain has .arg(s) as expected
func verifyErrChain(t *testing.T, e *Error, argv ...interface{}) {
i := 0
for ; e != nil; i, e = i+1, e.link {
if i >= len(argv) {
t.Fatal("too long error chain")
}
if e.arg != argv[i] {
t.Fatalf("error caught but unexpected %vth arg: %v ; want %v", i, e.arg, argv[i])
}
}
if i < len(argv) {
t.Fatal("too small error chain")
}
}
func do_onunwind1(t *testing.T) {
defer erronunwind(func(e *Error) *Error {
t.Fatal("on unwind called without raise")
return nil
})
}
func do_onunwind2() {
defer erronunwind(func(e *Error) *Error {
return &Error{2, e}
})
do_raise1()
}
func TestErrOnUnwind(t *testing.T) {
defer errcatch(func(e *Error) {
verifyErrChain(t, e, 2, 1)
})
do_onunwind1(t)
do_onunwind2()
t.Fatal("error not caught")
}
func do_context1(t *testing.T) {
defer errcontext(func() interface{} {
t.Fatal("on context called without raise")
return nil
})
}
func do_context2() {
defer errcontext(func() interface{} {
return 3
})
do_raise1()
}
func TestErrContext(t *testing.T) {
defer errcatch(func(e *Error) {
verifyErrChain(t, e, 3, 1)
})
do_context1(t)
do_context2()
t.Fatal("error not caught")
}
func TestMyFuncName(t *testing.T) {
myfunc := myfuncname()
// go test changes full package name (putting filesystem of the tree into ti)
// thus we check only for suffix
wantsuffix := ".TestMyFuncName"
if !strings.HasSuffix(myfunc, wantsuffix) {
t.Errorf("myfuncname() -> %v ; want *%v", myfunc, wantsuffix)
}
}
func do_raise11() {
do_raise1()
}
func TestErrAddCallingContext(t *testing.T) {
myfunc := myfuncname()
defer errcatch(func(e *Error) {
e = erraddcallingcontext(myfunc, e)
msg, want := e.Error(), "do_raise11: do_raise1: 1"
if msg != want {
t.Fatalf("err + calling context: %q ; want %q", msg, want)
}
})
do_raise11()
t.Fatal("error not caught")
}
This diff is collapsed.
This diff is collapsed.
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
"syscall"
"testing"
)
func xgetcwd(t *testing.T) string {
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
return cwd
}
func xchdir(t *testing.T, dir string) {
err := os.Chdir(dir)
if err != nil {
t.Fatal(err)
}
}
// verify end-to-end pull-restore
func TestPullRestore(t *testing.T) {
// if something raises -> don't let testing panic - report it as proper error with context.
here := myfuncname()
defer errcatch(func(e *Error) {
e = erraddcallingcontext(here, e)
// add file:line for failing code inside testing function - so we have exact context to debug
failedat := ""
for _, f := range xtraceback(1) {
if f.Name() == here {
// TODO(go1.7) -> f.File, f.Line (f becomes runtime.Frame)
file, line := f.FileLine(f.pc - 1)
failedat = fmt.Sprintf("%s:%d", filepath.Base(file), line)
break
}
}
if failedat == "" {
panic(fmt.Errorf("cannot lookup failedat for %s", here))
}
t.Errorf("%s: %v", failedat, e)
})
workdir, err := ioutil.TempDir("", "t-git-backup")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(workdir)
mydir := xgetcwd(t)
xchdir(t, workdir)
defer xchdir(t, mydir)
// -test.v -> verbosity of git-backup
if testing.Verbose() {
verbose = 1
} else {
verbose = 0
}
// init backup repository
xgit("init", "--bare", "backup.git")
xchdir(t, "backup.git")
// pull from testdata
my1 := mydir + "/testdata/1"
cmd_pull([]string{my1+":b1"})
// prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits)
xgit("prune")
// verify backup repo is all ok
xgit("fsck")
// verify that just pulled tag objects are now gone after pruning -
// - they become not directly git-present. The only possibility to
// get them back is via recreating from encoded commit objects.
tags := []string{"11e67095628aa17b03436850e690faea3006c25d",
"ba899e5639273a6fa4d50d684af8db1ae070351e",
"7124713e403925bc772cd252b0dec099f3ced9c5",
"f735011c9fcece41219729a33f7876cd8791f659"}
for _, tag := range tags {
gerr, _, _ := git("cat-file", "-p", tag)
if gerr == nil {
t.Fatalf("tag %s still present in backup.git after git-prune", tag)
}
}
// restore backup
work1 := workdir + "/1"
cmd_restore([]string{"HEAD", "b1:"+work1})
// verify files restored to the same as original
gerr, diff, _ := git("diff", "--no-index", "--raw", "--exit-code", my1, work1)
// 0 - no diff, 1 - has diff, 2 - problem
if gerr != nil && gerr.Sys().(syscall.WaitStatus).ExitStatus() > 1 {
t.Fatal(gerr)
}
gitObjectsRe := regexp.MustCompile(`\.git/objects/`)
for _, diffline := range strings.Split(diff, "\n") {
// :srcmode dstmode srcsha1 dstsha1 status\tpath
_, path, err := headtail(diffline, "\t")
if err != nil {
t.Fatalf("restorecheck: cannot parse diff line %q", diffline)
}
// git objects can be represented differently (we check them later)
if gitObjectsRe.FindString(path) != "" {
continue
}
t.Fatal("restorecheck: unexpected diff:", diffline)
}
// verify git objects restored to the same as original
err = filepath.Walk(my1, func(path string, info os.FileInfo, err error) error {
// any error -> stop
if err != nil {
return err
}
// non *.git/ -- not interesting
if !(info.IsDir() && strings.HasSuffix(path, ".git")) {
return nil
}
// found git repo - check refs & objects in original and restored are exactly the same,
var R = [2]struct{ path, reflist, revlist string }{
{path: path}, // original
{path: reprefix(my1, work1, path)}, // restored
}
for _, repo := range R {
// fsck just in case
xgit("--git-dir=" + repo.path, "fsck")
// NOTE for-each-ref sorts output by refname
repo.reflist = xgit("--git-dir=" + repo.path, "for-each-ref")
// NOTE rev-list emits objects in reverse chronological order,
// starting from refs roots which are also ordered by refname
repo.revlist = xgit("--git-dir=" + repo.path, "rev-list", "--all", "--objects")
}
if R[0].reflist != R[1].reflist {
t.Fatalf("restorecheck: %q restored with different reflist (in %q)", R[0].path, R[1].path)
}
if R[0].revlist != R[1].revlist {
t.Fatalf("restorecheck: %q restored with differrent objects (in %q)", R[0].path, R[1].path)
}
// .git verified - no need to recurse
return filepath.SkipDir
})
if err != nil {
t.Fatal(err)
}
// now try to pull corrupt repo - pull should refuse if transferred pack contains bad objects
my2 := mydir + "/testdata/2"
func() {
defer errcatch(func(e *Error) {
// it ok - pull should raise
})
cmd_pull([]string{my2+":b2"})
t.Fatal("fetching from corrupt.git did not complain")
}()
}
func TestRepoRefSplit(t *testing.T) {
var tests = []struct{ reporef, repo, ref string }{
{"kirr/wendelin.core.git/heads/master", "kirr/wendelin.core.git", "heads/master"},
{"kirr/erp5.git/backup/x/master+erp5-data-notebook", "kirr/erp5.git", "backup/x/master+erp5-data-notebook"},
{"tiwariayush/Discussion%20Forum%20.git/...", "tiwariayush/Discussion Forum .git", "..."},
{"tiwariayush/Discussion%20Forum+.git/...", "tiwariayush/Discussion Forum+.git", "..."},
{"tiwariayush/Discussion%2BForum+.git/...", "tiwariayush/Discussion+Forum+.git", "..."},
}
for _, tt := range tests {
repo, ref := reporef_split(tt.reporef)
if repo != tt.repo || ref != tt.ref {
t.Errorf("reporef_split(%q) -> %q %q ; want %q %q", tt.reporef, repo, ref, tt.repo, tt.ref)
}
}
}
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// Git-backup | Run git subprocess
package main
import (
"bytes"
"fmt"
"os"
"os/exec"
"strings"
)
// how/whether to redirect stdio of spawned process
type StdioRedirect int
const (
PIPE StdioRedirect = iota // connect stdio channel via PIPE to parent (default value)
DontRedirect
)
type RunWith struct {
stdin string
stdout StdioRedirect // PIPE | DontRedirect
stderr StdioRedirect // PIPE | DontRedirect
raw bool // !raw -> stdout, stderr are stripped
env map[string]string // !nil -> subprocess environment setup from env
}
// run `git *argv` -> error, stdout, stderr
func _git(argv []string, ctx RunWith) (err error, stdout, stderr string) {
debugf("git %s", strings.Join(argv, " "))
cmd := exec.Command("git", argv...)
stdoutBuf := bytes.Buffer{}
stderrBuf := bytes.Buffer{}
if ctx.stdin != "" {
cmd.Stdin = strings.NewReader(ctx.stdin)
}
switch ctx.stdout {
case PIPE:
cmd.Stdout = &stdoutBuf
case DontRedirect:
cmd.Stdout = os.Stdout
default:
panic("git: stdout redirect mode invalid")
}
switch ctx.stderr {
case PIPE:
cmd.Stderr = &stderrBuf
case DontRedirect:
cmd.Stderr = os.Stderr
default:
panic("git: stderr redirect mode invalid")
}
if ctx.env != nil {
env := []string{}
for k, v := range ctx.env {
env = append(env, k+"="+v)
}
cmd.Env = env
}
err = cmd.Run()
stdout = String(stdoutBuf.Bytes())
stderr = String(stderrBuf.Bytes())
if !ctx.raw {
// prettify stdout (e.g. so that 'sha1\n' becomes 'sha1' and can be used directly
stdout = strings.TrimSpace(stdout)
stderr = strings.TrimSpace(stderr)
}
return err, stdout, stderr
}
// error a git command returned
type GitError struct {
GitErrContext
*exec.ExitError
}
type GitErrContext struct {
argv []string
stdin string
stdout string
stderr string
}
func (e *GitError) Error() string {
msg := e.GitErrContext.Error()