Commit 5aac4734 authored by Kirill Smelkov's avatar Kirill Smelkov

Verify tag/tree/blob encoding is consistent and always the same

In upcoming patch we are going to switch xcommit_tree() to our own
implementation, and since this can potentially change how commits are
represented, for backward compatibility reason we need to make sure
objects encoded as commits stay the same.

So for all kind of objects (they are present in testdata/ repositories)
add checks that:

    - encode/decode is idempotent
    - encoding and decoding produces exactly expected sha1

One nice side effect of this is that we can now remove runtime
consistency check from tail of decoding. That check was there from the
beginning - from 6f237f22 (git-backup: Initial draft) mainly present
because there was no testsuite at that time. That check place is however
even not completely right - in case we somehow wrongly pulled an object
it has to be detected at pull time, not restore time. So that check was
checking only 1/2 of implementation - and not the main one - that
decoding does not mess up.

Since now we have proper testsuite and add encode/decode tests in this
patch, we can remove that partial runtime check. And even if decoding
messes something up, despite having it testsuited, it will be 100%
caught by restore process, because for an extracted repository, if
there is no some object which needs to be present in it, pack generation
for that repository will fail. So we can be safe with the removal.

Time for restoring kirr/slapos.git from lab.nexedi.com backup

before: 5.5s
after:  3.5s

( so much because there are ~ 500 tags in slapos.git and currently tag
  encoding is done with spawning separate subprocess per tag )
parent dbf86b19
...@@ -288,7 +288,10 @@ func obj_represent_as_commit(g *git.Repository, sha1 Sha1, obj_type string) Sha1 ...@@ -288,7 +288,10 @@ func obj_represent_as_commit(g *git.Repository, sha1 Sha1, obj_type string) Sha1
// recreate tag/tree/blob from specially crafted commit // recreate tag/tree/blob from specially crafted commit
// (see obj_represent_as_commit() about how a objects are originally translated into commit) // (see obj_represent_as_commit() about how a objects are originally translated into commit)
func obj_recreate_from_commit(g *git.Repository, commit_sha1 Sha1) { // returns:
// - tag: recreated object sha1
// - tree/blob: null sha1
func obj_recreate_from_commit(g *git.Repository, commit_sha1 Sha1) Sha1 {
xraise := func(info interface{}) { raise(&RecreateObjError{commit_sha1, info}) } xraise := func(info interface{}) { raise(&RecreateObjError{commit_sha1, info}) }
xraisef := func(f string, a ...interface{}) { xraise(fmt.Sprintf(f, a...)) } xraisef := func(f string, a ...interface{}) { xraise(fmt.Sprintf(f, a...)) }
...@@ -311,7 +314,7 @@ func obj_recreate_from_commit(g *git.Repository, commit_sha1 Sha1) { ...@@ -311,7 +314,7 @@ func obj_recreate_from_commit(g *git.Repository, commit_sha1 Sha1) {
// for tree/blob we do not need to do anything - that objects were reachable // for tree/blob we do not need to do anything - that objects were reachable
// from commit and are present in git db. // from commit and are present in git db.
if obj_type == "tree" || obj_type == "blob" { if obj_type == "tree" || obj_type == "blob" {
return return Sha1{}
} }
// re-create tag object // re-create tag object
...@@ -332,11 +335,7 @@ func obj_recreate_from_commit(g *git.Repository, commit_sha1 Sha1) { ...@@ -332,11 +335,7 @@ func obj_recreate_from_commit(g *git.Repository, commit_sha1 Sha1) {
obj_recreate_from_commit(g, Sha1FromOid(commit.ParentId(0))) obj_recreate_from_commit(g, Sha1FromOid(commit.ParentId(0)))
} }
// verify consistency via re-encoding tag again return tag_sha1
commit_sha1_ := obj_represent_as_commit(g, tag_sha1, "tag")
if commit_sha1_ != commit_sha1 {
xraisef("encoded tag corrupt (reencoded as %s)", commit_sha1_)
}
} }
type RecreateObjError struct { type RecreateObjError struct {
......
...@@ -40,6 +40,14 @@ func xchdir(t *testing.T, dir string) { ...@@ -40,6 +40,14 @@ func xchdir(t *testing.T, dir string) {
} }
} }
func XSha1(s string) Sha1 {
sha1, err := Sha1Parse(s)
if err != nil {
panic(err)
}
return sha1
}
// verify end-to-end pull-restore // verify end-to-end pull-restore
func TestPullRestore(t *testing.T) { func TestPullRestore(t *testing.T) {
// if something raises -> don't let testing panic - report it as proper error with context. // if something raises -> don't let testing panic - report it as proper error with context.
...@@ -93,6 +101,43 @@ func TestPullRestore(t *testing.T) { ...@@ -93,6 +101,43 @@ func TestPullRestore(t *testing.T) {
my1 := mydir + "/testdata/1" my1 := mydir + "/testdata/1"
cmd_pull(gb, []string{my1+":b1"}) cmd_pull(gb, []string{my1+":b1"})
// verify tag/tree/blob encoding is 1) consistent and 2) always the same.
// we need it be always the same so different git-backup versions can
// interoperate with each other.
var noncommitv = []struct{
sha1 Sha1 // original
sha1_ Sha1 // encoded
istag bool // is original object a tag object
}{
{XSha1("f735011c9fcece41219729a33f7876cd8791f659"), XSha1("4f2486e99ff9744751e0756b155e57bb24c453dd"), true}, // tag-to-commit
{XSha1("7124713e403925bc772cd252b0dec099f3ced9c5"), XSha1("6b3beabee3e0704fa3269558deab01e9d5d7764e"), true}, // tag-to-tag
{XSha1("11e67095628aa17b03436850e690faea3006c25d"), XSha1("89ad5fbeb9d3f0c7bc6366855a09484819289911"), true}, // tag-to-blob
{XSha1("ba899e5639273a6fa4d50d684af8db1ae070351e"), XSha1("68ad6a7c31042e53201e47aee6096ed081b6fdb9"), true}, // tag-to-tree
{XSha1("61882eb85774ed4401681d800bb9c638031375e2"), XSha1("761f55bcdf119ced3fcf23b69fdc169cbb5fc143"), false}, // ref-to-tree
{XSha1("7a3343f584218e973165d943d7c0af47a52ca477"), XSha1("366f3598d662909e2537481852e42b775c7eb837"), false}, // ref-to-blob
}
for _, nc := range noncommitv {
// encoded object should be already present
_, err := ReadObject2(gb, nc.sha1_)
if err != nil {
t.Fatalf("encode %s should give %s but expected encoded object not found: %s", nc.sha1, nc.sha1_, err)
}
// decoding encoded object should give original sha1, if it was tag
sha1 := obj_recreate_from_commit(gb, nc.sha1_)
if nc.istag && sha1 != nc.sha1 {
t.Fatalf("decode %s -> %s ; want %s", nc.sha1_, sha1, nc.sha1)
}
// encoding original object should give sha1_
obj_type := xgit("cat-file", "-t", nc.sha1)
sha1_ := obj_represent_as_commit(gb, nc.sha1, obj_type)
if sha1_ != nc.sha1_ {
t.Fatalf("encode %s -> %s ; want %s", sha1, sha1_, nc.sha1_)
}
}
// prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits) // prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits)
xgit("prune") xgit("prune")
...@@ -102,14 +147,13 @@ func TestPullRestore(t *testing.T) { ...@@ -102,14 +147,13 @@ func TestPullRestore(t *testing.T) {
// verify that just pulled tag objects are now gone after pruning - // verify that just pulled tag objects are now gone after pruning -
// - they become not directly git-present. The only possibility to // - they become not directly git-present. The only possibility to
// get them back is via recreating from encoded commit objects. // get them back is via recreating from encoded commit objects.
tags := []string{"11e67095628aa17b03436850e690faea3006c25d", for _, nc := range noncommitv {
"ba899e5639273a6fa4d50d684af8db1ae070351e", if !nc.istag {
"7124713e403925bc772cd252b0dec099f3ced9c5", continue
"f735011c9fcece41219729a33f7876cd8791f659"} }
for _, tag := range tags { gerr, _, _ := ggit("cat-file", "-p", nc.sha1)
gerr, _, _ := ggit("cat-file", "-p", tag)
if gerr == nil { if gerr == nil {
t.Fatalf("tag %s still present in backup.git after git-prune", tag) t.Fatalf("tag %s still present in backup.git after git-prune", nc.sha1)
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment