Commit 624393db authored by Kirill Smelkov's avatar Kirill Smelkov

Hook in git2go (cgo bindings to libgit2)

Currently for every file -> blob, and blob -> file we invoke git
subprocess (cat-file or hash-object). We also invoke git subprocess for
every tag read/write and the same for commits and this 1-subprocess per
1 object has very high overhead.

The ways to avoid such overhead could be:

1) for every kind of operation spawn git service process, like e.g.
   `git cat-file --batch` for reading files, and only do request/reply
   per object with it.

2) use some go library to work with git repository ourselves.

"1" can work but:

    - at present there is no counterpart of `cat-file --batch` for
      e.g. `hash-object` - i.e. we cannot write objects without quirks
      or patching git.

    - even if we add support for hashing via request/reply, as all
      requests are processed sequentially on git side by e.g. `git
      cat-file --batch`, we won't be able to leverage parallelism.

    - request/reply has also latency attached.

For "2" we have roughly the following choices:

    - use cgo bindings to libgit2   (git2go)

    - use some pure-go git library

Pure-go approach has pros that it by design avoids problems related to
tricky CGo pointer C <-> Go passing rules. The fact that this was sorted
out by go team itself only during 1.6 cycle

    https://github.com/golang/go/issues/12416

tells a lot. The net is full of examples where those were hard to get,
and git2go in particular has a story of e.g. heap corruption (the bug
was on golang itself side and fixed only for 1.5)

    https://github.com/libgit2/git2go/issues/223
    https://groups.google.com/forum/#!topic/golang-nuts/Vi1HD-54BTA/discussion

However there is no good (to my knowledge) pure-go git library, and the
family of forks around github.com/speedata/gogit either:

    - works 3x slower compared to git2go

      ( or the same 3x in serial mode compared to e.g. `git cat-file --batch`
        as in serial mode git subservice and git2go has roughly similar performance )

    - or does not work at all (e.g. barfing out on REF_DELTA pack
      entries, etc)

So because of 3x slowdown, pure-go way is currently a no-runner.

Since one person from golang team cared to update git2go to properly
follow the CGo rules

    https://github.com/libgit2/git2go/pull/282

we can be relatively confident about git2go bindings quality and try to
use it.

This commit only hooks git2go into the build, subcommands and to Sha1
for to/from Oid conversion. We'll be switching places to git2go
incrementally in upcoming patches.

NOTE for now we need git2go from next branch for

    https://github.com/libgit2/git2go/commit/cf7553e7

The plan is to eventually switch to

    gopkg.in/libgit2/git2go.v25

once it is out.
parent fdaa4a19
......@@ -71,6 +71,8 @@ import (
"strings"
"syscall"
"time"
git "github.com/libgit2/git2go"
)
// verbose output
......@@ -354,7 +356,7 @@ type PullSpec struct {
dir, prefix string
}
func cmd_pull(argv []string) {
func cmd_pull(gb *git.Repository, argv []string) {
flags := flag.FlagSet{Usage: cmd_pull_usage}
flags.Init("", flag.ExitOnError)
flags.Parse(argv)
......@@ -377,7 +379,7 @@ func cmd_pull(argv []string) {
pullspecv = append(pullspecv, PullSpec{dir, prefix})
}
cmd_pull_(pullspecv)
cmd_pull_(gb, pullspecv)
}
// info about ref pointing to sha1
......@@ -386,7 +388,7 @@ type Ref struct {
sha1 Sha1
}
func cmd_pull_(pullspecv []PullSpec) {
func cmd_pull_(gb *git.Repository, pullspecv []PullSpec) {
// while pulling, we'll keep refs from all pulled repositories under temp
// unique work refs namespace.
backup_time := time.Now().Format("20060102-1504") // %Y%m%d-%H%M
......@@ -601,7 +603,7 @@ type RestoreSpec struct {
prefix, dir string
}
func cmd_restore(argv []string) {
func cmd_restore(gb *git.Repository, argv []string) {
flags := flag.FlagSet{Usage: cmd_restore_usage}
flags.Init("", flag.ExitOnError)
flags.Parse(argv)
......@@ -626,7 +628,7 @@ func cmd_restore(argv []string) {
restorespecv = append(restorespecv, RestoreSpec{prefix, dir})
}
cmd_restore_(HEAD, restorespecv)
cmd_restore_(gb, HEAD, restorespecv)
}
// kirr/wendelin.core.git/heads/master -> kirr/wendelin.core.git, heads/master
......@@ -714,7 +716,7 @@ func (br ByRepoPath) Search(prefix string) int {
})
}
func cmd_restore_(HEAD_ string, restorespecv []RestoreSpec) {
func cmd_restore_(gb *git.Repository, HEAD_ string, restorespecv []RestoreSpec) {
HEAD := xgitSha1("rev-parse", "--verify", HEAD_)
// read backup refs index
......@@ -878,7 +880,7 @@ func cmd_restore_(HEAD_ string, restorespecv []RestoreSpec) {
}
}
var commands = map[string]func([]string){
var commands = map[string]func(*git.Repository, []string){
"pull": cmd_pull,
"restore": cmd_restore,
}
......@@ -933,5 +935,9 @@ func main() {
os.Exit(1)
})
cmd(argv[1:])
// backup repository
gb, err := git.OpenRepository(".")
raiseif(err)
cmd(gb, argv[1:])
}
......@@ -21,6 +21,8 @@ import (
"strings"
"syscall"
"testing"
git "github.com/libgit2/git2go"
)
func xgetcwd(t *testing.T) string {
......@@ -82,10 +84,14 @@ func TestPullRestore(t *testing.T) {
// init backup repository
xgit("init", "--bare", "backup.git")
xchdir(t, "backup.git")
gb, err := git.OpenRepository(".")
if err != nil {
t.Fatal(err)
}
// pull from testdata
my1 := mydir + "/testdata/1"
cmd_pull([]string{my1+":b1"})
cmd_pull(gb, []string{my1+":b1"})
// prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits)
xgit("prune")
......@@ -107,9 +113,16 @@ func TestPullRestore(t *testing.T) {
}
}
// reopen backup repository - to avoid having stale cache with present
// objects we deleted above with `git prune`
gb, err = git.OpenRepository(".")
if err != nil {
t.Fatal(err)
}
// restore backup
work1 := workdir + "/1"
cmd_restore([]string{"HEAD", "b1:"+work1})
cmd_restore(gb, []string{"HEAD", "b1:"+work1})
// verify files restored to the same as original
gerr, diff, _ := ggit("diff", "--no-index", "--raw", "--exit-code", my1, work1)
......@@ -181,7 +194,7 @@ func TestPullRestore(t *testing.T) {
defer errcatch(func(e *Error) {
// it ok - pull should raise
})
cmd_pull([]string{my2+":b2"})
cmd_pull(gb, []string{my2+":b2"})
t.Fatal("fetching from corrupt.git did not complain")
}()
}
......
......@@ -17,6 +17,8 @@ import (
"bytes"
"encoding/hex"
"fmt"
git "github.com/libgit2/git2go"
)
const SHA1_RAWSIZE = 20
......@@ -81,3 +83,12 @@ type BySha1 []Sha1
func (p BySha1) Len() int { return len(p) }
func (p BySha1) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p BySha1) Less(i, j int) bool { return bytes.Compare(p[i].sha1[:], p[j].sha1[:]) < 0 }
// interoperability with git2go
func (sha1 *Sha1) AsOid() *git.Oid {
return (*git.Oid)(&sha1.sha1)
}
func Sha1FromOid(oid *git.Oid) Sha1 {
return Sha1{*oid}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment