Commit f3f694b9 authored by Kirill Smelkov's avatar Kirill Smelkov

~gofmt

parent cc6ac54f
......@@ -67,28 +67,28 @@ NOTE the idea of pulling all refs together is similar to git-namespaces
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
pathpkg "path"
"path/filepath"
"runtime"
"runtime/debug"
"sort"
"strings"
"sync"
"syscall"
"time"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/my"
"lab.nexedi.com/kirr/go123/xerr"
"lab.nexedi.com/kirr/go123/xflag"
"lab.nexedi.com/kirr/go123/xstrings"
git "github.com/libgit2/git2go"
"flag"
"fmt"
"io/ioutil"
"os"
pathpkg "path"
"path/filepath"
"runtime"
"runtime/debug"
"sort"
"strings"
"sync"
"syscall"
"time"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/my"
"lab.nexedi.com/kirr/go123/xerr"
"lab.nexedi.com/kirr/go123/xflag"
"lab.nexedi.com/kirr/go123/xstrings"
git "github.com/libgit2/git2go"
)
// verbose output
......@@ -99,26 +99,26 @@ import (
var verbose = 1
func infof(format string, a ...interface{}) {
if verbose > 0 {
fmt.Printf(format, a...)
fmt.Println()
}
if verbose > 0 {
fmt.Printf(format, a...)
fmt.Println()
}
}
// what to pass to git subprocess to stdout/stderr
// DontRedirect - no-redirection, PIPE - output to us
func gitprogress() StdioRedirect {
if verbose > 1 {
return DontRedirect
}
return PIPE
if verbose > 1 {
return DontRedirect
}
return PIPE
}
func debugf(format string, a ...interface{}) {
if verbose > 2 {
fmt.Printf(format, a...)
fmt.Println()
}
if verbose > 2 {
fmt.Printf(format, a...)
fmt.Println()
}
}
// how many max jobs to spawn
......@@ -128,48 +128,48 @@ var njobs = runtime.NumCPU()
// file -> blob_sha1, mode
func file_to_blob(g *git.Repository, path string) (Sha1, uint32) {
var blob_content []byte
// because we want to pass mode to outside world (to e.g. `git update-index`)
// we need to get native OS mode, not translated one as os.Lstat() would give us.
var st syscall.Stat_t
err := syscall.Lstat(path, &st)
if err != nil {
exc.Raise(&os.PathError{"lstat", path, err})
}
if st.Mode&syscall.S_IFMT == syscall.S_IFLNK {
__, err := os.Readlink(path)
blob_content = mem.Bytes(__)
exc.Raiseif(err)
} else {
blob_content, err = ioutil.ReadFile(path)
exc.Raiseif(err)
}
blob_sha1, err := WriteObject(g, blob_content, git.ObjectBlob)
exc.Raiseif(err)
return blob_sha1, st.Mode
var blob_content []byte
// because we want to pass mode to outside world (to e.g. `git update-index`)
// we need to get native OS mode, not translated one as os.Lstat() would give us.
var st syscall.Stat_t
err := syscall.Lstat(path, &st)
if err != nil {
exc.Raise(&os.PathError{"lstat", path, err})
}
if st.Mode&syscall.S_IFMT == syscall.S_IFLNK {
__, err := os.Readlink(path)
blob_content = mem.Bytes(__)
exc.Raiseif(err)
} else {
blob_content, err = ioutil.ReadFile(path)
exc.Raiseif(err)
}
blob_sha1, err := WriteObject(g, blob_content, git.ObjectBlob)
exc.Raiseif(err)
return blob_sha1, st.Mode
}
// blob_sha1, mode -> file
func blob_to_file(g *git.Repository, blob_sha1 Sha1, mode uint32, path string) {
blob, err := ReadObject(g, blob_sha1, git.ObjectBlob)
exc.Raiseif(err)
blob_content := blob.Data()
err = os.MkdirAll(pathpkg.Dir(path), 0777)
exc.Raiseif(err)
if mode&syscall.S_IFMT == syscall.S_IFLNK {
err = os.Symlink(mem.String(blob_content), path)
exc.Raiseif(err)
} else {
// NOTE mode is native - we cannot use ioutil.WriteFile() directly
err = writefile(path, blob_content, mode)
exc.Raiseif(err)
}
blob, err := ReadObject(g, blob_sha1, git.ObjectBlob)
exc.Raiseif(err)
blob_content := blob.Data()
err = os.MkdirAll(pathpkg.Dir(path), 0777)
exc.Raiseif(err)
if mode&syscall.S_IFMT == syscall.S_IFLNK {
err = os.Symlink(mem.String(blob_content), path)
exc.Raiseif(err)
} else {
// NOTE mode is native - we cannot use ioutil.WriteFile() directly
err = writefile(path, blob_content, mode)
exc.Raiseif(err)
}
}
// -------- tags representation --------
......@@ -185,74 +185,74 @@ func blob_to_file(g *git.Repository, blob_sha1 Sha1, mode uint32, path string) {
// object and tagged object is kept there in repo thanks to it being reachable
// through created commit.
func obj_represent_as_commit(g *git.Repository, sha1 Sha1, obj_type git.ObjectType) Sha1 {
switch obj_type {
case git.ObjectTag, git.ObjectTree, git.ObjectBlob:
// ok
default:
exc.Raisef("%s (%s): cannot encode as commit", sha1, obj_type)
}
// first line in commit msg = object type
obj_encoded := gittypestr(obj_type) + "\n"
var tagged_type git.ObjectType
var tagged_sha1 Sha1
// below the code layout is mainly for tag type, and we hook tree and blob
// types handling into that layout
if obj_type == git.ObjectTag {
tag, tag_obj := xload_tag(g, sha1)
tagged_type = tag.tagged_type
tagged_sha1 = tag.tagged_sha1
obj_encoded += mem.String(tag_obj.Data())
} else {
// for tree/blob we only care that object stays reachable
tagged_type = obj_type
tagged_sha1 = sha1
}
// all commits we do here - we do with fixed name/date, so transformation
// tag->commit is stable wrt git environment and time change
fixed := AuthorInfo{Name: "Git backup", Email: "git@backup.org", When: time.Unix(0, 0).UTC()}
zcommit_tree := func(tree Sha1, parents []Sha1, msg string) Sha1 {
return xcommit_tree2(g, tree, parents, msg, fixed, fixed)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> ø
// Commit .parent -> Commit
if tagged_type == git.ObjectCommit {
return zcommit_tree(mktree_empty(), []Sha1{tagged_sha1}, obj_encoded)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> Tree
// Tree .parent -> ø
if tagged_type == git.ObjectTree {
return zcommit_tree(tagged_sha1, []Sha1{}, obj_encoded)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> Tree* "tagged" -> Blob
// Blob .parent -> ø
if tagged_type == git.ObjectBlob {
tree_for_blob := xgitSha1("mktree", RunWith{stdin: fmt.Sprintf("100644 blob %s\ttagged\n", tagged_sha1)})
return zcommit_tree(tree_for_blob, []Sha1{}, obj_encoded)
}
// Tag₂ ~> Commit₂*
// | .msg: Tag₂
// v .tree -> ø
// Tag₁ .parent -> Commit₁*
if tagged_type == git.ObjectTag {
commit1 := obj_represent_as_commit(g, tagged_sha1, tagged_type)
return zcommit_tree(mktree_empty(), []Sha1{commit1}, obj_encoded)
}
exc.Raisef("%s (%q): unknown tagged type", sha1, tagged_type)
panic(0)
switch obj_type {
case git.ObjectTag, git.ObjectTree, git.ObjectBlob:
// ok
default:
exc.Raisef("%s (%s): cannot encode as commit", sha1, obj_type)
}
// first line in commit msg = object type
obj_encoded := gittypestr(obj_type) + "\n"
var tagged_type git.ObjectType
var tagged_sha1 Sha1
// below the code layout is mainly for tag type, and we hook tree and blob
// types handling into that layout
if obj_type == git.ObjectTag {
tag, tag_obj := xload_tag(g, sha1)
tagged_type = tag.tagged_type
tagged_sha1 = tag.tagged_sha1
obj_encoded += mem.String(tag_obj.Data())
} else {
// for tree/blob we only care that object stays reachable
tagged_type = obj_type
tagged_sha1 = sha1
}
// all commits we do here - we do with fixed name/date, so transformation
// tag->commit is stable wrt git environment and time change
fixed := AuthorInfo{Name: "Git backup", Email: "git@backup.org", When: time.Unix(0, 0).UTC()}
zcommit_tree := func(tree Sha1, parents []Sha1, msg string) Sha1 {
return xcommit_tree2(g, tree, parents, msg, fixed, fixed)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> ø
// Commit .parent -> Commit
if tagged_type == git.ObjectCommit {
return zcommit_tree(mktree_empty(), []Sha1{tagged_sha1}, obj_encoded)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> Tree
// Tree .parent -> ø
if tagged_type == git.ObjectTree {
return zcommit_tree(tagged_sha1, []Sha1{}, obj_encoded)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> Tree* "tagged" -> Blob
// Blob .parent -> ø
if tagged_type == git.ObjectBlob {
tree_for_blob := xgitSha1("mktree", RunWith{stdin: fmt.Sprintf("100644 blob %s\ttagged\n", tagged_sha1)})
return zcommit_tree(tree_for_blob, []Sha1{}, obj_encoded)
}
// Tag₂ ~> Commit₂*
// | .msg: Tag₂
// v .tree -> ø
// Tag₁ .parent -> Commit₁*
if tagged_type == git.ObjectTag {
commit1 := obj_represent_as_commit(g, tagged_sha1, tagged_type)
return zcommit_tree(mktree_empty(), []Sha1{commit1}, obj_encoded)
}
exc.Raisef("%s (%q): unknown tagged type", sha1, tagged_type)
panic(0)
}
// recreate tag/tree/blob from specially crafted commit
......@@ -261,68 +261,68 @@ func obj_represent_as_commit(g *git.Repository, sha1 Sha1, obj_type git.ObjectTy
// - tag: recreated object sha1
// - tree/blob: null sha1
func obj_recreate_from_commit(g *git.Repository, commit_sha1 Sha1) Sha1 {
xraise := func(info interface{}) { exc.Raise(&RecreateObjError{commit_sha1, info}) }
xraisef := func(f string, a ...interface{}) { xraise(fmt.Sprintf(f, a...)) }
commit, err := g.LookupCommit(commit_sha1.AsOid())
if err != nil {
xraise(err)
}
if commit.ParentCount() > 1 {
xraise(">1 parents")
}
obj_type, obj_raw, err := xstrings.HeadTail(commit.Message(), "\n")
if err != nil {
xraise("invalid encoded format")
}
switch obj_type {
case "tag", "tree", "blob":
// ok
default:
xraisef("unexpected encoded object type %q", obj_type)
}
// for tree/blob we do not need to do anything - that objects were reachable
// from commit and are present in git db.
if obj_type == "tree" || obj_type == "blob" {
return Sha1{}
}
// re-create tag object
tag_sha1, err := WriteObject(g, mem.Bytes(obj_raw), git.ObjectTag)
exc.Raiseif(err)
// the original tagged object should be already in repository, because we
// always attach it to encoding commit one way or another,
// except we need to recurse, if it was Tag₂->Tag₁
tag, err := tag_parse(obj_raw)
if err != nil {
xraisef("encoded tag: %s", err)
}
if tag.tagged_type == git.ObjectTag {
if commit.ParentCount() == 0 {
xraise("encoded tag corrupt (tagged is tag but []parent is empty)")
}
obj_recreate_from_commit(g, Sha1FromOid(commit.ParentId(0)))
}
return tag_sha1
xraise := func(info interface{}) { exc.Raise(&RecreateObjError{commit_sha1, info}) }
xraisef := func(f string, a ...interface{}) { xraise(fmt.Sprintf(f, a...)) }
commit, err := g.LookupCommit(commit_sha1.AsOid())
if err != nil {
xraise(err)
}
if commit.ParentCount() > 1 {
xraise(">1 parents")
}
obj_type, obj_raw, err := xstrings.HeadTail(commit.Message(), "\n")
if err != nil {
xraise("invalid encoded format")
}
switch obj_type {
case "tag", "tree", "blob":
// ok
default:
xraisef("unexpected encoded object type %q", obj_type)
}
// for tree/blob we do not need to do anything - that objects were reachable
// from commit and are present in git db.
if obj_type == "tree" || obj_type == "blob" {
return Sha1{}
}
// re-create tag object
tag_sha1, err := WriteObject(g, mem.Bytes(obj_raw), git.ObjectTag)
exc.Raiseif(err)
// the original tagged object should be already in repository, because we
// always attach it to encoding commit one way or another,
// except we need to recurse, if it was Tag₂->Tag₁
tag, err := tag_parse(obj_raw)
if err != nil {
xraisef("encoded tag: %s", err)
}
if tag.tagged_type == git.ObjectTag {
if commit.ParentCount() == 0 {
xraise("encoded tag corrupt (tagged is tag but []parent is empty)")
}
obj_recreate_from_commit(g, Sha1FromOid(commit.ParentId(0)))
}
return tag_sha1
}
type RecreateObjError struct {
commit_sha1 Sha1
info interface{}
commit_sha1 Sha1
info interface{}
}
func (e *RecreateObjError) Error() string {
return fmt.Sprintf("commit %s: %s", e.commit_sha1, e.info)
return fmt.Sprintf("commit %s: %s", e.commit_sha1, e.info)
}
// -------- git-backup pull --------
func cmd_pull_usage() {
fmt.Fprint(os.Stderr,
fmt.Fprint(os.Stderr,
`git-backup pull <dir1>:<prefix1> <dir2>:<prefix2> ...
Pull bare Git repositories & just files from dir1 into backup prefix1,
......@@ -331,333 +331,332 @@ from dir2 into backup prefix2, etc...
}
type PullSpec struct {
dir, prefix string
dir, prefix string
}
func cmd_pull(gb *git.Repository, argv []string) {
flags := flag.FlagSet{Usage: cmd_pull_usage}
flags.Init("", flag.ExitOnError)
flags.Parse(argv)
argv = flags.Args()
if len(argv) < 1 {
cmd_pull_usage()
os.Exit(1)
}
pullspecv := []PullSpec{}
for _, arg := range argv {
dir, prefix, err := xstrings.Split2(arg, ":")
if err != nil {
fmt.Fprintf(os.Stderr, "E: invalid pullspec %q\n", arg)
cmd_pull_usage()
os.Exit(1)
}
pullspecv = append(pullspecv, PullSpec{dir, prefix})
}
cmd_pull_(gb, pullspecv)
flags := flag.FlagSet{Usage: cmd_pull_usage}
flags.Init("", flag.ExitOnError)
flags.Parse(argv)
argv = flags.Args()
if len(argv) < 1 {
cmd_pull_usage()
os.Exit(1)
}
pullspecv := []PullSpec{}
for _, arg := range argv {
dir, prefix, err := xstrings.Split2(arg, ":")
if err != nil {
fmt.Fprintf(os.Stderr, "E: invalid pullspec %q\n", arg)
cmd_pull_usage()
os.Exit(1)
}
pullspecv = append(pullspecv, PullSpec{dir, prefix})
}
cmd_pull_(gb, pullspecv)
}
// Ref is info about a reference pointing to sha1.
type Ref struct {
name string // reference name without "refs/" prefix
sha1 Sha1
name string // reference name without "refs/" prefix
sha1 Sha1
}
func cmd_pull_(gb *git.Repository, pullspecv []PullSpec) {
// while pulling, we'll keep refs from all pulled repositories under temp
// unique work refs namespace.
backup_time := time.Now().Format("20060102-1504") // %Y%m%d-%H%M
backup_refs_work := fmt.Sprintf("refs/backup/%s/", backup_time) // refs/backup/20150820-2109/
backup_lock := "refs/backup.locked"
// make sure another `git-backup pull` is not running
xgit("update-ref", backup_lock, mktree_empty(), Sha1{})
// make sure there is root commit
var HEAD Sha1
var err error
gerr, __, _ := ggit("rev-parse", "--verify", "HEAD")
if gerr != nil {
infof("# creating root commit")
// NOTE `git commit` does not work in bare repo - do commit by hand
HEAD = xcommit_tree(gb, mktree_empty(), []Sha1{}, "Initialize git-backup repository")
xgit("update-ref", "-m", "git-backup pull init", "HEAD", HEAD)
} else {
HEAD, err = Sha1Parse(__)
exc.Raiseif(err)
}
// build index of "already-have" objects: all commits + tag/tree/blob that
// were at heads of already pulled repositories.
//
// Build it once and use below to check ourselves whether a head from a pulled
// repository needs to be actually fetched. If we don't, `git fetch-pack`
// will do similar to "all commits" linear scan for every pulled repository,
// which are many out there.
alreadyHave := Sha1Set{}
infof("# building \"already-have\" index")
// already have: all commits
//
// As of lab.nexedi.com/20180612 there are ~ 1.7·10⁷ objects total in backup.
// Of those there are ~ 1.9·10⁶ commit objects, i.e. ~10% of total.
// Since 1 sha1 is 2·10¹ bytes, the space needed for keeping sha1 of all
// commits is ~ 4·10⁷B = ~40MB. It is thus ok to keep this index in RAM for now.
for _, __ := range xstrings.SplitLines(xgit("rev-list", HEAD), "\n") {
sha1, err := Sha1Parse(__)
exc.Raiseif(err)
alreadyHave.Add(sha1)
}
// already have: tag/tree/blob that were at heads of already pulled repositories
//
// As of lab.nexedi.com/20180612 there are ~ 8.4·10⁴ refs in total.
// Of those encoded tag/tree/blob are ~ 3.2·10⁴, i.e. ~40% of total.
// The number of tag/tree/blob objects in alreadyHave is thus negligible
// compared to the number of "all commits".
hcommit, err := gb.LookupCommit(HEAD.AsOid())
exc.Raiseif(err)
htree, err := hcommit.Tree()
exc.Raiseif(err)
if htree.EntryByName("backup.refs") != nil {
repotab, err := loadBackupRefs(fmt.Sprintf("%s:backup.refs", HEAD))
exc.Raiseif(err)
for _, repo := range repotab {
for _, xref := range repo.refs {
if xref.sha1 != xref.sha1_ && !alreadyHave.Contains(xref.sha1) {
// make sure encoded tag/tree/blob objects represented as
// commits are present. We do so, because we promise to
// fetch that all objects in alreadyHave are present.
obj_recreate_from_commit(gb, xref.sha1_)
alreadyHave.Add(xref.sha1)
}
}
}
}
// walk over specified dirs, pulling objects from git and blobbing non-git-object files
blobbedv := []string{} // info about file pulled to blob, and not yet added to index
for _, __ := range pullspecv {
dir, prefix := __.dir, __.prefix
// make sure index is empty for prefix (so that we start from clean
// prefix namespace and this way won't leave stale removed things)
xgit("rm", "--cached", "-r", "--ignore-unmatch", "--", prefix)
here := my.FuncName()
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) (errout error) {
if err != nil {
if os.IsNotExist(err) {
// a file or directory was removed in parallel to us scanning the tree.
infof("Warning: Skipping %s: %s", path, err)
return nil
}
// any other error -> stop
return err
}
// propagate exceptions properly via filepath.Walk as errors with calling context
// (filepath is not our code)
defer exc.Catch(func(e *exc.Error) {
errout = exc.Addcallingcontext(here, e)
})
// files -> blobs + queue info for adding blobs to index
if !info.IsDir() {
infof("# file %s\t<- %s", prefix, path)
blob, mode := file_to_blob(gb, path)
blobbedv = append(blobbedv,
fmt.Sprintf("%o %s\t%s", mode, blob, reprefix(dir, prefix, path)))
return nil
}
// directories -> look for *.git and handle git object specially.
// do not recurse into *.git/objects/ - we'll save them specially
if strings.HasSuffix(path, ".git/objects") {
return filepath.SkipDir
}
// else we recurse, but handle *.git specially - via fetching objects from it
if !strings.HasSuffix(path, ".git") {
return nil
}
// git repo - let's pull all refs from it to our backup refs namespace
infof("# git %s\t<- %s", prefix, path)
refv, _, err := fetch(path, alreadyHave)
exc.Raiseif(err)
// TODO don't store to git references all references from fetched repository:
//
// We need to store to git references only references that were actually
// fetched - so that next fetch, e.g. from a fork that also has new data
// as its upstream, won't have to transfer what we just have fetched
// from upstream.
//
// For this purpose we can also save references by naming them as their
// sha1, not actual name, which will automatically deduplicate them in
// between several repositories, especially when/if pull will be made to
// work in parallel.
//
// Such changed-only deduplicated references should be O(δ) - usually only
// a few, and this way we will also automatically avoid O(n^2) behaviour
// of every git fetch scanning all local references at its startup.
//
// For backup.refs, we can generate it directly from refv of all fetched
// repositories saved in RAM.
reporefprefix := backup_refs_work +
// NOTE repo name is escaped as it can contain e.g. spaces, and refs must not
path_refescape(reprefix(dir, prefix, path))
for _, ref := range refv {
err = mkref(gb, reporefprefix + "/" + ref.name, ref.sha1)
exc.Raiseif(err)
}
// XXX do we want to do full fsck of source git repo on pull as well ?
return nil
})
// re-raise / raise error after Walk
if err != nil {
e := exc.Aserror(err)
e = exc.Addcontext(e, "pulling from "+dir)
exc.Raise(e)
}
}
// add to index files we converted to blobs
xgit("update-index", "--add", "--index-info", RunWith{stdin: strings.Join(blobbedv, "\n")})
// all refs from all found git repositories populated.
// now prepare manifest with ref -> sha1 and do a synthetic commit merging all that sha1
// (so they become all reachable from HEAD -> survive repack and be transferable on git pull)
//
// NOTE we handle tag/tree/blob objects specially - because these objects cannot
// be in commit parents, we convert them to specially-crafted commits and use them.
// The commits prepared contain full info how to restore original objects.
// backup.refs format:
//
// 1eeb0324 <prefix>/wendelin.core.git/heads/master
// 213a9243 <prefix>/wendelin.core.git/tags/v0.4 <213a9243-converted-to-commit>
// ...
//
// NOTE `git for-each-ref` sorts output by ref
// -> backup_refs is sorted and stable between runs
backup_refs_dump := xgit("for-each-ref", backup_refs_work)
backup_refs_list := []Ref{} // parsed dump
backup_refsv := []string{} // backup.refs content
backup_refs_parents := Sha1Set{} // sha1 for commit parents, obtained from refs
noncommit_seen := map[Sha1]Sha1{} // {} sha1 -> sha1_ (there are many duplicate tags)
for _, __ := range xstrings.SplitLines(backup_refs_dump, "\n") {
sha1, type_, ref := Sha1{}, "", ""
_, err := fmt.Sscanf(__, "%s %s %s\n", &sha1, &type_, &ref)
if err != nil {
exc.Raisef("%s: strange for-each-ref entry %q", backup_refs_work, __)
}
backup_refs_list = append(backup_refs_list, Ref{ref, sha1})
backup_refs_entry := fmt.Sprintf("%s %s", sha1, strip_prefix(backup_refs_work, ref))
// represent tag/tree/blob as specially crafted commit, because we
// cannot use it as commit parent.
sha1_ := sha1
if type_ != "commit" {
//infof("obj_as_commit %s %s\t%s", sha1, type_, ref) XXX
var seen bool
sha1_, seen = noncommit_seen[sha1]
if !seen {
obj_type, ok := gittype(type_)
if !ok {
exc.Raisef("%s: invalid git type in entry %q", backup_refs_work, __)
}
sha1_ = obj_represent_as_commit(gb, sha1, obj_type)
noncommit_seen[sha1] = sha1_
}
backup_refs_entry += fmt.Sprintf(" %s", sha1_)
}
backup_refsv = append(backup_refsv, backup_refs_entry)
if !backup_refs_parents.Contains(sha1_) { // several refs can refer to the same sha1
backup_refs_parents.Add(sha1_)
}
}
backup_refs := strings.Join(backup_refsv, "\n")
backup_refs_parentv := backup_refs_parents.Elements()
sort.Sort(BySha1(backup_refs_parentv)) // so parents order is stable in between runs
// backup_refs -> blob
backup_refs_sha1 := xgitSha1("hash-object", "-w", "--stdin", RunWith{stdin: backup_refs})
// add backup_refs blob to index
xgit("update-index", "--add", "--cacheinfo", fmt.Sprintf("100644,%s,backup.refs", backup_refs_sha1))
// index is ready - prepare tree and commit
backup_tree_sha1 := xgitSha1("write-tree")
commit_sha1 := xcommit_tree(gb, backup_tree_sha1, append([]Sha1{HEAD}, backup_refs_parentv...),
"Git-backup " + backup_time)
xgit("update-ref", "-m", "git-backup pull", "HEAD", commit_sha1, HEAD)
// remove no-longer needed backup refs & verify they don't stay
backup_refs_delete := ""
for _, ref := range backup_refs_list {
backup_refs_delete += fmt.Sprintf("delete %s %s\n", ref.name, ref.sha1)
}
xgit("update-ref", "--stdin", RunWith{stdin: backup_refs_delete})
__ = xgit("for-each-ref", backup_refs_work)
if __ != "" {
exc.Raisef("Backup refs under %s not deleted properly", backup_refs_work)
}
// NOTE `delete` deletes only files, but leaves empty dirs around.
// more important: this affect performance of future `git-backup pull` run a *LOT*
//
// reason is: `git pull` first check local refs, and for doing so it
// recourse into all directories, even empty ones.
//
// https://lab.nexedi.com/lab.nexedi.com/lab.nexedi.com/issues/4
//
// So remove all dirs under backup_refs_work prefix in the end.
//
// TODO Revisit this when reworking fetch to be parallel. Reason is: in
// the process of pulling repositories, the more references we
// accumulate, the longer pull starts to be, so it becomes O(n^2).
//
// -> what to do is described nearby fetch/mkref call.
gitdir := xgit("rev-parse", "--git-dir")
err = os.RemoveAll(gitdir+"/"+backup_refs_work)
exc.Raiseif(err) // NOTE err is nil if path does not exist
// if we have working copy - update it
bare := xgit("rev-parse", "--is-bare-repository")
if bare != "true" {
// `git checkout-index -af` -- does not delete deleted files
// `git read-tree -v -u --reset HEAD~ HEAD` -- needs index matching
// original worktree to properly work, but we already have updated index
//
// so we get changes we committed as diff and apply to worktree
diff := xgit("diff", "--binary", HEAD, "HEAD", RunWith{raw: true})
if diff != "" {
diffstat := xgit("apply", "--stat", "--apply", "--binary", "--whitespace=nowarn",
RunWith{stdin: diff, raw: true})
infof("%s", diffstat)
}
}
// we are done - unlock
xgit("update-ref", "-d", backup_lock)
// while pulling, we'll keep refs from all pulled repositories under temp
// unique work refs namespace.
backup_time := time.Now().Format("20060102-1504") // %Y%m%d-%H%M
backup_refs_work := fmt.Sprintf("refs/backup/%s/", backup_time) // refs/backup/20150820-2109/
backup_lock := "refs/backup.locked"
// make sure another `git-backup pull` is not running
xgit("update-ref", backup_lock, mktree_empty(), Sha1{})
// make sure there is root commit
var HEAD Sha1
var err error
gerr, __, _ := ggit("rev-parse", "--verify", "HEAD")
if gerr != nil {
infof("# creating root commit")
// NOTE `git commit` does not work in bare repo - do commit by hand
HEAD = xcommit_tree(gb, mktree_empty(), []Sha1{}, "Initialize git-backup repository")
xgit("update-ref", "-m", "git-backup pull init", "HEAD", HEAD)
} else {
HEAD, err = Sha1Parse(__)
exc.Raiseif(err)
}
// build index of "already-have" objects: all commits + tag/tree/blob that
// were at heads of already pulled repositories.
//
// Build it once and use below to check ourselves whether a head from a pulled
// repository needs to be actually fetched. If we don't, `git fetch-pack`
// will do similar to "all commits" linear scan for every pulled repository,
// which are many out there.
alreadyHave := Sha1Set{}
infof("# building \"already-have\" index")
// already have: all commits
//
// As of lab.nexedi.com/20180612 there are ~ 1.7·10⁷ objects total in backup.
// Of those there are ~ 1.9·10⁶ commit objects, i.e. ~10% of total.
// Since 1 sha1 is 2·10¹ bytes, the space needed for keeping sha1 of all
// commits is ~ 4·10⁷B = ~40MB. It is thus ok to keep this index in RAM for now.
for _, __ := range xstrings.SplitLines(xgit("rev-list", HEAD), "\n") {
sha1, err := Sha1Parse(__)
exc.Raiseif(err)
alreadyHave.Add(sha1)
}
// already have: tag/tree/blob that were at heads of already pulled repositories
//
// As of lab.nexedi.com/20180612 there are ~ 8.4·10⁴ refs in total.
// Of those encoded tag/tree/blob are ~ 3.2·10⁴, i.e. ~40% of total.
// The number of tag/tree/blob objects in alreadyHave is thus negligible
// compared to the number of "all commits".
hcommit, err := gb.LookupCommit(HEAD.AsOid())
exc.Raiseif(err)
htree, err := hcommit.Tree()
exc.Raiseif(err)
if htree.EntryByName("backup.refs") != nil {
repotab, err := loadBackupRefs(fmt.Sprintf("%s:backup.refs", HEAD))
exc.Raiseif(err)
for _, repo := range repotab {
for _, xref := range repo.refs {
if xref.sha1 != xref.sha1_ && !alreadyHave.Contains(xref.sha1) {
// make sure encoded tag/tree/blob objects represented as
// commits are present. We do so, because we promise to
// fetch that all objects in alreadyHave are present.
obj_recreate_from_commit(gb, xref.sha1_)
alreadyHave.Add(xref.sha1)
}
}
}
}
// walk over specified dirs, pulling objects from git and blobbing non-git-object files
blobbedv := []string{} // info about file pulled to blob, and not yet added to index
for _, __ := range pullspecv {
dir, prefix := __.dir, __.prefix
// make sure index is empty for prefix (so that we start from clean
// prefix namespace and this way won't leave stale removed things)
xgit("rm", "--cached", "-r", "--ignore-unmatch", "--", prefix)
here := my.FuncName()
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) (errout error) {
if err != nil {
if os.IsNotExist(err) {
// a file or directory was removed in parallel to us scanning the tree.
infof("Warning: Skipping %s: %s", path, err)
return nil
}
// any other error -> stop
return err
}
// propagate exceptions properly via filepath.Walk as errors with calling context
// (filepath is not our code)
defer exc.Catch(func(e *exc.Error) {
errout = exc.Addcallingcontext(here, e)
})
// files -> blobs + queue info for adding blobs to index
if !info.IsDir() {
infof("# file %s\t<- %s", prefix, path)
blob, mode := file_to_blob(gb, path)
blobbedv = append(blobbedv,
fmt.Sprintf("%o %s\t%s", mode, blob, reprefix(dir, prefix, path)))
return nil
}
// directories -> look for *.git and handle git object specially.
// do not recurse into *.git/objects/ - we'll save them specially
if strings.HasSuffix(path, ".git/objects") {
return filepath.SkipDir
}
// else we recurse, but handle *.git specially - via fetching objects from it
if !strings.HasSuffix(path, ".git") {
return nil
}
// git repo - let's pull all refs from it to our backup refs namespace
infof("# git %s\t<- %s", prefix, path)
refv, _, err := fetch(path, alreadyHave)
exc.Raiseif(err)
// TODO don't store to git references all references from fetched repository:
//
// We need to store to git references only references that were actually
// fetched - so that next fetch, e.g. from a fork that also has new data
// as its upstream, won't have to transfer what we just have fetched
// from upstream.
//
// For this purpose we can also save references by naming them as their
// sha1, not actual name, which will automatically deduplicate them in
// between several repositories, especially when/if pull will be made to
// work in parallel.
//
// Such changed-only deduplicated references should be O(δ) - usually only
// a few, and this way we will also automatically avoid O(n^2) behaviour
// of every git fetch scanning all local references at its startup.
//
// For backup.refs, we can generate it directly from refv of all fetched
// repositories saved in RAM.
reporefprefix := backup_refs_work +
// NOTE repo name is escaped as it can contain e.g. spaces, and refs must not
path_refescape(reprefix(dir, prefix, path))
for _, ref := range refv {
err = mkref(gb, reporefprefix+"/"+ref.name, ref.sha1)
exc.Raiseif(err)
}
// XXX do we want to do full fsck of source git repo on pull as well ?
return nil
})
// re-raise / raise error after Walk
if err != nil {
e := exc.Aserror(err)
e = exc.Addcontext(e, "pulling from "+dir)
exc.Raise(e)
}
}
// add to index files we converted to blobs
xgit("update-index", "--add", "--index-info", RunWith{stdin: strings.Join(blobbedv, "\n")})
// all refs from all found git repositories populated.
// now prepare manifest with ref -> sha1 and do a synthetic commit merging all that sha1
// (so they become all reachable from HEAD -> survive repack and be transferable on git pull)
//
// NOTE we handle tag/tree/blob objects specially - because these objects cannot
// be in commit parents, we convert them to specially-crafted commits and use them.
// The commits prepared contain full info how to restore original objects.
// backup.refs format:
//
// 1eeb0324 <prefix>/wendelin.core.git/heads/master
// 213a9243 <prefix>/wendelin.core.git/tags/v0.4 <213a9243-converted-to-commit>
// ...
//
// NOTE `git for-each-ref` sorts output by ref
// -> backup_refs is sorted and stable between runs
backup_refs_dump := xgit("for-each-ref", backup_refs_work)
backup_refs_list := []Ref{} // parsed dump
backup_refsv := []string{} // backup.refs content
backup_refs_parents := Sha1Set{} // sha1 for commit parents, obtained from refs
noncommit_seen := map[Sha1]Sha1{} // {} sha1 -> sha1_ (there are many duplicate tags)
for _, __ := range xstrings.SplitLines(backup_refs_dump, "\n") {
sha1, type_, ref := Sha1{}, "", ""
_, err := fmt.Sscanf(__, "%s %s %s\n", &sha1, &type_, &ref)
if err != nil {
exc.Raisef("%s: strange for-each-ref entry %q", backup_refs_work, __)
}
backup_refs_list = append(backup_refs_list, Ref{ref, sha1})
backup_refs_entry := fmt.Sprintf("%s %s", sha1, strip_prefix(backup_refs_work, ref))
// represent tag/tree/blob as specially crafted commit, because we
// cannot use it as commit parent.
sha1_ := sha1
if type_ != "commit" {
//infof("obj_as_commit %s %s\t%s", sha1, type_, ref) XXX
var seen bool
sha1_, seen = noncommit_seen[sha1]
if !seen {
obj_type, ok := gittype(type_)
if !ok {
exc.Raisef("%s: invalid git type in entry %q", backup_refs_work, __)
}
sha1_ = obj_represent_as_commit(gb, sha1, obj_type)
noncommit_seen[sha1] = sha1_
}
backup_refs_entry += fmt.Sprintf(" %s", sha1_)
}
backup_refsv = append(backup_refsv, backup_refs_entry)
if !backup_refs_parents.Contains(sha1_) { // several refs can refer to the same sha1
backup_refs_parents.Add(sha1_)
}
}
backup_refs := strings.Join(backup_refsv, "\n")
backup_refs_parentv := backup_refs_parents.Elements()
sort.Sort(BySha1(backup_refs_parentv)) // so parents order is stable in between runs
// backup_refs -> blob
backup_refs_sha1 := xgitSha1("hash-object", "-w", "--stdin", RunWith{stdin: backup_refs})
// add backup_refs blob to index
xgit("update-index", "--add", "--cacheinfo", fmt.Sprintf("100644,%s,backup.refs", backup_refs_sha1))
// index is ready - prepare tree and commit
backup_tree_sha1 := xgitSha1("write-tree")
commit_sha1 := xcommit_tree(gb, backup_tree_sha1, append([]Sha1{HEAD}, backup_refs_parentv...),
"Git-backup "+backup_time)
xgit("update-ref", "-m", "git-backup pull", "HEAD", commit_sha1, HEAD)
// remove no-longer needed backup refs & verify they don't stay
backup_refs_delete := ""
for _, ref := range backup_refs_list {
backup_refs_delete += fmt.Sprintf("delete %s %s\n", ref.name, ref.sha1)
}
xgit("update-ref", "--stdin", RunWith{stdin: backup_refs_delete})
__ = xgit("for-each-ref", backup_refs_work)
if __ != "" {
exc.Raisef("Backup refs under %s not deleted properly", backup_refs_work)
}
// NOTE `delete` deletes only files, but leaves empty dirs around.
// more important: this affect performance of future `git-backup pull` run a *LOT*
//
// reason is: `git pull` first check local refs, and for doing so it
// recourse into all directories, even empty ones.
//
// https://lab.nexedi.com/lab.nexedi.com/lab.nexedi.com/issues/4
//
// So remove all dirs under backup_refs_work prefix in the end.
//
// TODO Revisit this when reworking fetch to be parallel. Reason is: in
// the process of pulling repositories, the more references we
// accumulate, the longer pull starts to be, so it becomes O(n^2).
//
// -> what to do is described nearby fetch/mkref call.
gitdir := xgit("rev-parse", "--git-dir")
err = os.RemoveAll(gitdir + "/" + backup_refs_work)
exc.Raiseif(err) // NOTE err is nil if path does not exist
// if we have working copy - update it
bare := xgit("rev-parse", "--is-bare-repository")
if bare != "true" {
// `git checkout-index -af` -- does not delete deleted files
// `git read-tree -v -u --reset HEAD~ HEAD` -- needs index matching
// original worktree to properly work, but we already have updated index
//
// so we get changes we committed as diff and apply to worktree
diff := xgit("diff", "--binary", HEAD, "HEAD", RunWith{raw: true})
if diff != "" {
diffstat := xgit("apply", "--stat", "--apply", "--binary", "--whitespace=nowarn",
RunWith{stdin: diff, raw: true})
infof("%s", diffstat)
}
}
// we are done - unlock
xgit("update-ref", "-d", backup_lock)
}
// fetch makes sure all objects from a repository are present in backup place.
......@@ -683,133 +682,133 @@ func cmd_pull_(gb *git.Repository, pullspecv []PullSpec) {
// Note: fetch does not create any local references - the references returned
// only describe state of references in fetched source repository.
func fetch(repo string, alreadyHave Sha1Set) (refv, fetchedv []Ref, err error) {
defer xerr.Contextf(&err, "fetch %s", repo)
// first check which references are advertised
refv, err = lsremote(repo)
if err != nil {
return nil, nil, err
}
// check if we already have something
var fetchv []Ref // references we need to actually fetch.
for _, ref := range refv {
if !alreadyHave.Contains(ref.sha1) {
fetchv = append(fetchv, ref)
}
}
// if there is nothing to fetch - we are done
if len(fetchv) == 0 {
return refv, fetchv, nil
}
// fetch by sha1 what we don't already have from advertised.
//
// even if refs would change after ls-remote but before here, we should be
// getting exactly what was advertised.
//
// related link on the subject:
// https://git.kernel.org/pub/scm/git/git.git/commit/?h=051e4005a3
var argv []interface{}
arg := func(v ...interface{}) { argv = append(argv, v...) }
arg(
// check objects for corruption as they are fetched
"-c", "fetch.fsckObjects=true",
"fetch-pack", "--thin",
// force upload-pack to allow us asking any sha1 we want.
// needed because advertised refs we got at lsremote time could have changed.
"--upload-pack=git -c uploadpack.allowAnySHA1InWant=true" +
// workarounds for git < 2.11.1, which does not have uploadpack.allowAnySHA1InWant:
" -c uploadpack.allowTipSHA1InWant=true -c uploadpack.allowReachableSHA1InWant=true" +
//
" upload-pack",
repo)
for _, ref := range fetchv {
arg(ref.sha1)
}
arg(RunWith{stderr: gitprogress()})
gerr, _, _ := ggit(argv...)
if gerr != nil {
return nil, nil, gerr
}
// fetch-pack ran ok - now check that all fetched tips are indeed fully
// connected and that we also have all referenced blob/tree objects. The
// reason for this check is that source repository could send us a pack with
// e.g. some objects missing and this way even if fetch-pack would report
// success, chances could be we won't have all the objects we think we
// fetched.
//
// when checking we assume that the roots we already have at all our
// references are ok.
//
// related link on the subject:
// https://git.kernel.org/pub/scm/git/git.git/commit/?h=6d4bb3833c
argv = nil
arg("rev-list", "--quiet", "--objects", "--not", "--all", "--not")
for _, ref := range fetchv {
arg(ref.sha1)
}
arg(RunWith{stderr: gitprogress()})
gerr, _, _ = ggit(argv...)
if gerr != nil {
return nil, nil, fmt.Errorf("remote did not send all neccessary objects")
}
// fetched ok
return refv, fetchv, nil
defer xerr.Contextf(&err, "fetch %s", repo)
// first check which references are advertised
refv, err = lsremote(repo)
if err != nil {
return nil, nil, err
}
// check if we already have something
var fetchv []Ref // references we need to actually fetch.
for _, ref := range refv {
if !alreadyHave.Contains(ref.sha1) {
fetchv = append(fetchv, ref)
}
}
// if there is nothing to fetch - we are done
if len(fetchv) == 0 {
return refv, fetchv, nil
}
// fetch by sha1 what we don't already have from advertised.
//
// even if refs would change after ls-remote but before here, we should be
// getting exactly what was advertised.
//
// related link on the subject:
// https://git.kernel.org/pub/scm/git/git.git/commit/?h=051e4005a3
var argv []interface{}
arg := func(v ...interface{}) { argv = append(argv, v...) }
arg(
// check objects for corruption as they are fetched
"-c", "fetch.fsckObjects=true",
"fetch-pack", "--thin",
// force upload-pack to allow us asking any sha1 we want.
// needed because advertised refs we got at lsremote time could have changed.
"--upload-pack=git -c uploadpack.allowAnySHA1InWant=true"+
// workarounds for git < 2.11.1, which does not have uploadpack.allowAnySHA1InWant:
" -c uploadpack.allowTipSHA1InWant=true -c uploadpack.allowReachableSHA1InWant=true"+
//
" upload-pack",
repo)
for _, ref := range fetchv {
arg(ref.sha1)
}
arg(RunWith{stderr: gitprogress()})
gerr, _, _ := ggit(argv...)
if gerr != nil {
return nil, nil, gerr
}
// fetch-pack ran ok - now check that all fetched tips are indeed fully
// connected and that we also have all referenced blob/tree objects. The
// reason for this check is that source repository could send us a pack with
// e.g. some objects missing and this way even if fetch-pack would report
// success, chances could be we won't have all the objects we think we
// fetched.
//
// when checking we assume that the roots we already have at all our
// references are ok.
//
// related link on the subject:
// https://git.kernel.org/pub/scm/git/git.git/commit/?h=6d4bb3833c
argv = nil
arg("rev-list", "--quiet", "--objects", "--not", "--all", "--not")
for _, ref := range fetchv {
arg(ref.sha1)
}
arg(RunWith{stderr: gitprogress()})
gerr, _, _ = ggit(argv...)
if gerr != nil {
return nil, nil, fmt.Errorf("remote did not send all neccessary objects")
}
// fetched ok
return refv, fetchv, nil
}
// lsremote lists all references advertised by repo.
func lsremote(repo string) (refv []Ref, err error) {
defer xerr.Contextf(&err, "lsremote %s", repo)
// NOTE --refs instructs to omit peeled refs like
//
// c668db59ccc59e97ce81f769d9f4633e27ad3bdb refs/tags/v0.1
// 4b6821f4a4e4c9648941120ccbab03982e33104f refs/tags/v0.1^{} <--
//
// because fetch-pack errors on them:
//
// https://public-inbox.org/git/20180610143231.7131-1-kirr@nexedi.com/
//
// we don't need to pull them anyway.
gerr, stdout, _ := ggit("ls-remote", "--refs", repo)
if gerr != nil {
return nil, gerr
}
// oid refname
// oid refname
// ...
for _, entry := range xstrings.SplitLines(stdout, "\n") {
sha1, ref := Sha1{}, ""
_, err := fmt.Sscanf(entry, "%s %s\n", &sha1, &ref)
if err != nil {
return nil, fmt.Errorf("strange output entry: %q", entry)
}
// Ref says its name goes without "refs/" prefix.
if !strings.HasPrefix(ref, "refs/") {
return nil, fmt.Errorf("non-refs/ reference: %q", ref)
}
ref = strings.TrimPrefix(ref, "refs/")
refv = append(refv, Ref{ref, sha1})
}
return refv, nil
defer xerr.Contextf(&err, "lsremote %s", repo)
// NOTE --refs instructs to omit peeled refs like
//
// c668db59ccc59e97ce81f769d9f4633e27ad3bdb refs/tags/v0.1
// 4b6821f4a4e4c9648941120ccbab03982e33104f refs/tags/v0.1^{} <--
//
// because fetch-pack errors on them:
//
// https://public-inbox.org/git/20180610143231.7131-1-kirr@nexedi.com/
//
// we don't need to pull them anyway.
gerr, stdout, _ := ggit("ls-remote", "--refs", repo)
if gerr != nil {
return nil, gerr
}
// oid refname
// oid refname
// ...
for _, entry := range xstrings.SplitLines(stdout, "\n") {
sha1, ref := Sha1{}, ""
_, err := fmt.Sscanf(entry, "%s %s\n", &sha1, &ref)
if err != nil {
return nil, fmt.Errorf("strange output entry: %q", entry)
}
// Ref says its name goes without "refs/" prefix.
if !strings.HasPrefix(ref, "refs/") {
return nil, fmt.Errorf("non-refs/ reference: %q", ref)
}
ref = strings.TrimPrefix(ref, "refs/")
refv = append(refv, Ref{ref, sha1})
}
return refv, nil
}
// -------- git-backup restore --------
func cmd_restore_usage() {
fmt.Fprint(os.Stderr,
fmt.Fprint(os.Stderr,
`git-backup restore <commit-ish> <prefix1>:<dir1> <prefix2>:<dir2> ...
Restore Git repositories & just files from backup prefix1 into dir1,
......@@ -820,61 +819,61 @@ Backup state to restore is taken from <commit-ish>.
}
type RestoreSpec struct {
prefix, dir string
prefix, dir string
}
func cmd_restore(gb *git.Repository, argv []string) {
flags := flag.FlagSet{Usage: cmd_restore_usage}
flags.Init("", flag.ExitOnError)
flags.Parse(argv)
argv = flags.Args()
if len(argv) < 2 {
cmd_restore_usage()
os.Exit(1)
}
HEAD := argv[0]
restorespecv := []RestoreSpec{}
for _, arg := range argv[1:] {
prefix, dir, err := xstrings.Split2(arg, ":")
if err != nil {
fmt.Fprintf(os.Stderr, "E: invalid restorespec %q\n", arg)
cmd_restore_usage()
os.Exit(1)
}
restorespecv = append(restorespecv, RestoreSpec{prefix, dir})
}
cmd_restore_(gb, HEAD, restorespecv)
flags := flag.FlagSet{Usage: cmd_restore_usage}
flags.Init("", flag.ExitOnError)
flags.Parse(argv)
argv = flags.Args()
if len(argv) < 2 {
cmd_restore_usage()
os.Exit(1)
}
HEAD := argv[0]
restorespecv := []RestoreSpec{}
for _, arg := range argv[1:] {
prefix, dir, err := xstrings.Split2(arg, ":")
if err != nil {
fmt.Fprintf(os.Stderr, "E: invalid restorespec %q\n", arg)
cmd_restore_usage()
os.Exit(1)
}
restorespecv = append(restorespecv, RestoreSpec{prefix, dir})
}
cmd_restore_(gb, HEAD, restorespecv)
}
// kirr/wendelin.core.git/heads/master -> kirr/wendelin.core.git, heads/master
// tiwariayush/Discussion%20Forum%20.git/... -> tiwariayush/Discussion Forum .git, ...
func reporef_split(reporef string) (repo, ref string) {
dotgit := strings.Index(reporef, ".git/")
if dotgit == -1 {
exc.Raisef("E: %s is not a ref for a git repo", reporef)
}
repo, ref = reporef[:dotgit+4], reporef[dotgit+4+1:]
repo, err := path_refunescape(repo) // unescape repo name we originally escaped when making backup
exc.Raiseif(err)
return repo, ref
dotgit := strings.Index(reporef, ".git/")
if dotgit == -1 {
exc.Raisef("E: %s is not a ref for a git repo", reporef)
}
repo, ref = reporef[:dotgit+4], reporef[dotgit+4+1:]
repo, err := path_refunescape(repo) // unescape repo name we originally escaped when making backup
exc.Raiseif(err)
return repo, ref
}
// sha1 value(s) for a ref in 'backup.refs'
type BackupRefSha1 struct {
sha1 Sha1 // original sha1 this ref was pointing to in original repo
sha1_ Sha1 // sha1 actually used to represent sha1's object in backup repo
// (for tag/tree/blob - they are converted to commits)
sha1 Sha1 // original sha1 this ref was pointing to in original repo
sha1_ Sha1 // sha1 actually used to represent sha1's object in backup repo
// (for tag/tree/blob - they are converted to commits)
}
// BackupRef represents 1 reference entry in 'backup.refs' (repo prefix stripped)
type BackupRef struct {
name string // reference name without "refs/" prefix
BackupRefSha1
name string // reference name without "refs/" prefix
BackupRefSha1
}
// {} refname -> sha1, sha1_
......@@ -882,17 +881,17 @@ type RefMap map[string]BackupRefSha1
// info about a repository from backup.refs
type BackupRepo struct {
repopath string // full repo path with backup prefix
refs RefMap
repopath string // full repo path with backup prefix
refs RefMap
}
// all RefMap values as flat []BackupRef
func (m RefMap) Values() []BackupRef {
ev := make([]BackupRef, 0, len(m))
for ref, refsha1 := range m {
ev = append(ev, BackupRef{ref, refsha1})
}
return ev
ev := make([]BackupRef, 0, len(m))
for ref, refsha1 := range m {
ev = append(ev, BackupRef{ref, refsha1})
}
return ev
}
// for sorting []BackupRef by refname
......@@ -904,22 +903,22 @@ func (br ByRefname) Less(i, j int) bool { return strings.Compare(br[i].name, br[
// all sha1 heads RefMap points to, in sorted order
func (m RefMap) Sha1Heads() []Sha1 {
hs := Sha1Set{}
for _, refsha1 := range m {
hs.Add(refsha1.sha1)
}
headv := hs.Elements()
sort.Sort(BySha1(headv))
return headv
hs := Sha1Set{}
for _, refsha1 := range m {
hs.Add(refsha1.sha1)
}
headv := hs.Elements()
sort.Sort(BySha1(headv))
return headv
}
// like Sha1Heads() but returns heads in text format delimited by "\n"
func (m RefMap) Sha1HeadsStr() string {
s := ""
for _, sha1 := range m.Sha1Heads() {
s += sha1.String() + "\n"
}
return s
s := ""
for _, sha1 := range m.Sha1Heads() {
s += sha1.String() + "\n"
}
return s
}
// for sorting []BackupRepo by repopath
......@@ -931,282 +930,282 @@ func (br ByRepoPath) Less(i, j int) bool { return strings.Compare(br[i].repopath
// also for searching sorted []BackupRepo by repopath prefix
func (br ByRepoPath) Search(prefix string) int {
return sort.Search(len(br), func (i int) bool {
return strings.Compare(br[i].repopath, prefix) >= 0
})
return sort.Search(len(br), func(i int) bool {
return strings.Compare(br[i].repopath, prefix) >= 0
})
}
// request to extract a pack
type PackExtractReq struct {
refs RefMap // extract pack with objects from this heads
repopath string // into repository located here
refs RefMap // extract pack with objects from this heads
repopath string // into repository located here
// for info only: request was generated restoring from under this backup prefix
prefix string
// for info only: request was generated restoring from under this backup prefix
prefix string
}
func cmd_restore_(gb *git.Repository, HEAD_ string, restorespecv []RestoreSpec) {
HEAD := xgitSha1("rev-parse", "--verify", HEAD_)
// read backup refs index
repotab, err := loadBackupRefs(fmt.Sprintf("%s:backup.refs", HEAD))
exc.Raiseif(err)
// flattened & sorted repotab
// NOTE sorted - to process repos always in the same order & for searching
repov := make([]*BackupRepo, 0, len(repotab))
for _, repo := range repotab {
repov = append(repov, repo)
}
sort.Sort(ByRepoPath(repov))
// repotab no longer needed
repotab = nil
packxq := make(chan PackExtractReq, 2*njobs) // requests to extract packs
errch := make(chan error) // errors from workers
stopch := make(chan struct{}) // broadcasts restore has to be cancelled
wg := sync.WaitGroup{}
// main worker: walk over specified prefixes restoring files and
// scheduling pack extraction requests from *.git -> packxq
wg.Add(1)
go func() {
defer wg.Done()
defer close(packxq)
// raised err -> errch
here := my.FuncName()
defer exc.Catch(func(e *exc.Error) {
errch <- exc.Addcallingcontext(here, e)
})
runloop:
for _, __ := range restorespecv {
prefix, dir := __.prefix, __.dir
// ensure dir did not exist before restore run
err := os.Mkdir(dir, 0777)
exc.Raiseif(err)
// files
lstree := xgit("ls-tree", "--full-tree", "-r", "-z", "--", HEAD, prefix, RunWith{raw: true})
repos_seen := StrSet{} // dirs of *.git seen while restoring files
for _, __ := range xstrings.SplitLines(lstree, "\x00") {
mode, type_, sha1, filename, err := parse_lstree_entry(__)
// NOTE
// - `ls-tree -r` shows only leaf objects
// - git-backup repository does not have submodules and the like
// -> type should be "blob" only
if err != nil || type_ != "blob" {
exc.Raisef("%s: invalid/unexpected ls-tree entry %q", HEAD, __)
}
filename = reprefix(prefix, dir, filename)
infof("# file %s\t-> %s", prefix, filename)
blob_to_file(gb, sha1, mode, filename)
// make sure git will recognize *.git as repo:
// - it should have refs/{heads,tags}/ and objects/pack/ inside.
//
// NOTE doing it while restoring files, because a repo could be
// empty - without refs at all, and thus next "git packs restore"
// step will not be run for it.
filedir := pathpkg.Dir(filename)
if strings.HasSuffix(filedir, ".git") && !repos_seen.Contains(filedir) {
infof("# repo %s\t-> %s", prefix, filedir)
for _, __ := range []string{"refs/heads", "refs/tags", "objects/pack"} {
err := os.MkdirAll(filedir+"/"+__, 0777)
exc.Raiseif(err)
}
repos_seen.Add(filedir)
}
}
// git packs
for i := ByRepoPath(repov).Search(prefix); i < len(repov); i++ {
repo := repov[i]
if !strings.HasPrefix(repo.repopath, prefix) {
break // repov is sorted - end of repositories with prefix
}
// make sure tag/tree/blob objects represented as commits are
// present, before we generate pack for restored repo.
// ( such objects could be lost e.g. after backup repo repack as they
// are not reachable from backup repo HEAD )
for _, __ := range repo.refs {
if __.sha1 != __.sha1_ {
obj_recreate_from_commit(gb, __.sha1_)
}
}
select {
case packxq <- PackExtractReq{refs: repo.refs,
repopath: reprefix(prefix, dir, repo.repopath),
prefix: prefix}:
case <-stopch:
break runloop
}
}
}
}()
// pack workers: packxq -> extract packs
for i := 0; i < njobs; i++ {
wg.Add(1)
go func() {
defer wg.Done()
// raised err -> errch
here := my.FuncName()
defer exc.Catch(func(e *exc.Error) {
errch <- exc.Addcallingcontext(here, e)
})
runloop:
for {
select {
case <-stopch:
break runloop
case p, ok := <-packxq:
if !ok {
break runloop
}
infof("# git %s\t-> %s", p.prefix, p.repopath)
// extract pack for that repo from big backup pack + decoded tags
pack_argv := []string{
"-c", "pack.threads=1", // occupy only 1 CPU + it packs better
"pack-objects",
"--revs", // include all objects referencable from input sha1 list
"--reuse-object", "--reuse-delta", "--delta-base-offset",
// use bitmap index from backup repo, if present (faster pack generation)
// https://git.kernel.org/pub/scm/git/git.git/commit/?h=645c432d61
"--use-bitmap-index",
}
if verbose <= 0 {
pack_argv = append(pack_argv, "-q")
}
pack_argv = append(pack_argv, p.repopath+"/objects/pack/pack")
xgit2(pack_argv, RunWith{stdin: p.refs.Sha1HeadsStr(), stderr: gitprogress()})
// verify that extracted repo refs match backup.refs index after extraction
x_ref_list := xgit("--git-dir=" + p.repopath,
"for-each-ref", "--format=%(objectname) %(refname)")
repo_refs := p.refs.Values()
sort.Sort(ByRefname(repo_refs))
repo_ref_listv := make([]string, 0, len(repo_refs))
for _, ref := range repo_refs {
repo_ref_listv = append(repo_ref_listv, fmt.Sprintf("%s refs/%s", ref.sha1, ref.name))
}
repo_ref_list := strings.Join(repo_ref_listv, "\n")
if x_ref_list != repo_ref_list {
// TODO show refs diff, not 2 dumps
exc.Raisef("E: extracted %s refs corrupt:\n\nwant:\n%s\n\nhave:\n%s",
p.repopath, repo_ref_list, x_ref_list)
}
// check connectivity in recreated repository.
//
// This way we verify that extracted pack indeed contains all
// objects for all refs in the repo.
//
// Compared to fsck we do not re-compute sha1 sum of objects which
// is significantly faster.
gerr, _, _ := ggit("--git-dir=" + p.repopath,
"rev-list", "--objects", "--stdin", "--quiet", RunWith{stdin: p.refs.Sha1HeadsStr()})
if gerr != nil {
fmt.Fprintln(os.Stderr, "E: Problem while checking connectivity of extracted repo:")
exc.Raise(gerr)
}
// XXX disabled because it is slow
// // NOTE progress goes to stderr, problems go to stdout
// xgit("--git-dir=" + p.repopath, "fsck",
// # only check that traversal from refs is ok: this unpacks
// # commits and trees and verifies blob objects are there,
// # but do _not_ unpack blobs =fast.
// "--connectivity-only",
// RunWith{stdout: gitprogress(), stderr: gitprogress()})
}
}
}()
}
// wait for workers to finish & collect/reraise their errors
go func() {
wg.Wait()
close(errch)
}()
ev := xerr.Errorv{}
for e := range errch {
// tell everything to stop on first error
if len(ev) == 0 {
close(stopch)
}
ev = append(ev, e)
}
if len(ev) != 0 {
exc.Raise(ev)
}
HEAD := xgitSha1("rev-parse", "--verify", HEAD_)
// read backup refs index
repotab, err := loadBackupRefs(fmt.Sprintf("%s:backup.refs", HEAD))
exc.Raiseif(err)
// flattened & sorted repotab
// NOTE sorted - to process repos always in the same order & for searching
repov := make([]*BackupRepo, 0, len(repotab))
for _, repo := range repotab {
repov = append(repov, repo)
}
sort.Sort(ByRepoPath(repov))
// repotab no longer needed
repotab = nil
packxq := make(chan PackExtractReq, 2*njobs) // requests to extract packs
errch := make(chan error) // errors from workers
stopch := make(chan struct{}) // broadcasts restore has to be cancelled
wg := sync.WaitGroup{}
// main worker: walk over specified prefixes restoring files and
// scheduling pack extraction requests from *.git -> packxq
wg.Add(1)
go func() {
defer wg.Done()
defer close(packxq)
// raised err -> errch
here := my.FuncName()
defer exc.Catch(func(e *exc.Error) {
errch <- exc.Addcallingcontext(here, e)
})
runloop:
for _, __ := range restorespecv {
prefix, dir := __.prefix, __.dir
// ensure dir did not exist before restore run
err := os.Mkdir(dir, 0777)
exc.Raiseif(err)
// files
lstree := xgit("ls-tree", "--full-tree", "-r", "-z", "--", HEAD, prefix, RunWith{raw: true})
repos_seen := StrSet{} // dirs of *.git seen while restoring files
for _, __ := range xstrings.SplitLines(lstree, "\x00") {
mode, type_, sha1, filename, err := parse_lstree_entry(__)
// NOTE
// - `ls-tree -r` shows only leaf objects
// - git-backup repository does not have submodules and the like
// -> type should be "blob" only
if err != nil || type_ != "blob" {
exc.Raisef("%s: invalid/unexpected ls-tree entry %q", HEAD, __)
}
filename = reprefix(prefix, dir, filename)
infof("# file %s\t-> %s", prefix, filename)
blob_to_file(gb, sha1, mode, filename)
// make sure git will recognize *.git as repo:
// - it should have refs/{heads,tags}/ and objects/pack/ inside.
//
// NOTE doing it while restoring files, because a repo could be
// empty - without refs at all, and thus next "git packs restore"
// step will not be run for it.
filedir := pathpkg.Dir(filename)
if strings.HasSuffix(filedir, ".git") && !repos_seen.Contains(filedir) {
infof("# repo %s\t-> %s", prefix, filedir)
for _, __ := range []string{"refs/heads", "refs/tags", "objects/pack"} {
err := os.MkdirAll(filedir+"/"+__, 0777)
exc.Raiseif(err)
}
repos_seen.Add(filedir)
}
}
// git packs
for i := ByRepoPath(repov).Search(prefix); i < len(repov); i++ {
repo := repov[i]
if !strings.HasPrefix(repo.repopath, prefix) {
break // repov is sorted - end of repositories with prefix
}
// make sure tag/tree/blob objects represented as commits are
// present, before we generate pack for restored repo.
// ( such objects could be lost e.g. after backup repo repack as they
// are not reachable from backup repo HEAD )
for _, __ := range repo.refs {
if __.sha1 != __.sha1_ {
obj_recreate_from_commit(gb, __.sha1_)
}
}
select {
case packxq <- PackExtractReq{refs: repo.refs,
repopath: reprefix(prefix, dir, repo.repopath),
prefix: prefix}:
case <-stopch:
break runloop
}
}
}
}()
// pack workers: packxq -> extract packs
for i := 0; i < njobs; i++ {
wg.Add(1)
go func() {
defer wg.Done()
// raised err -> errch
here := my.FuncName()
defer exc.Catch(func(e *exc.Error) {
errch <- exc.Addcallingcontext(here, e)
})
runloop:
for {
select {
case <-stopch:
break runloop
case p, ok := <-packxq:
if !ok {
break runloop
}
infof("# git %s\t-> %s", p.prefix, p.repopath)
// extract pack for that repo from big backup pack + decoded tags
pack_argv := []string{
"-c", "pack.threads=1", // occupy only 1 CPU + it packs better
"pack-objects",
"--revs", // include all objects referencable from input sha1 list
"--reuse-object", "--reuse-delta", "--delta-base-offset",
// use bitmap index from backup repo, if present (faster pack generation)
// https://git.kernel.org/pub/scm/git/git.git/commit/?h=645c432d61
"--use-bitmap-index",
}
if verbose <= 0 {
pack_argv = append(pack_argv, "-q")
}
pack_argv = append(pack_argv, p.repopath+"/objects/pack/pack")
xgit2(pack_argv, RunWith{stdin: p.refs.Sha1HeadsStr(), stderr: gitprogress()})
// verify that extracted repo refs match backup.refs index after extraction
x_ref_list := xgit("--git-dir="+p.repopath,
"for-each-ref", "--format=%(objectname) %(refname)")
repo_refs := p.refs.Values()
sort.Sort(ByRefname(repo_refs))
repo_ref_listv := make([]string, 0, len(repo_refs))
for _, ref := range repo_refs {
repo_ref_listv = append(repo_ref_listv, fmt.Sprintf("%s refs/%s", ref.sha1, ref.name))
}
repo_ref_list := strings.Join(repo_ref_listv, "\n")
if x_ref_list != repo_ref_list {
// TODO show refs diff, not 2 dumps
exc.Raisef("E: extracted %s refs corrupt:\n\nwant:\n%s\n\nhave:\n%s",
p.repopath, repo_ref_list, x_ref_list)
}
// check connectivity in recreated repository.
//
// This way we verify that extracted pack indeed contains all
// objects for all refs in the repo.
//
// Compared to fsck we do not re-compute sha1 sum of objects which
// is significantly faster.
gerr, _, _ := ggit("--git-dir="+p.repopath,
"rev-list", "--objects", "--stdin", "--quiet", RunWith{stdin: p.refs.Sha1HeadsStr()})
if gerr != nil {
fmt.Fprintln(os.Stderr, "E: Problem while checking connectivity of extracted repo:")
exc.Raise(gerr)
}
// XXX disabled because it is slow
// // NOTE progress goes to stderr, problems go to stdout
// xgit("--git-dir=" + p.repopath, "fsck",
// # only check that traversal from refs is ok: this unpacks
// # commits and trees and verifies blob objects are there,
// # but do _not_ unpack blobs =fast.
// "--connectivity-only",
// RunWith{stdout: gitprogress(), stderr: gitprogress()})
}
}
}()
}
// wait for workers to finish & collect/reraise their errors
go func() {
wg.Wait()
close(errch)
}()
ev := xerr.Errorv{}
for e := range errch {
// tell everything to stop on first error
if len(ev) == 0 {
close(stopch)
}
ev = append(ev, e)
}
if len(ev) != 0 {
exc.Raise(ev)
}
}
// loadBackupRefs loads 'backup.ref' content from a git object.
//
// an example of object is e.g. "HEAD:backup.ref".
func loadBackupRefs(object string) (repotab map[string]*BackupRepo, err error) {
defer xerr.Contextf(&err, "load backup.refs %q", object)
gerr, backup_refs, _ := ggit("cat-file", "blob", object)
if gerr != nil {
return nil, gerr
}
repotab = make(map[string]*BackupRepo)
for _, refentry := range xstrings.SplitLines(backup_refs, "\n") {
// sha1 prefix+refname (sha1_)
badentry := func() error { return fmt.Errorf("invalid entry: %q", refentry) }
refentryv := strings.Fields(refentry)
if !(2 <= len(refentryv) && len(refentryv) <= 3) {
return nil, badentry()
}
sha1, err := Sha1Parse(refentryv[0])
sha1_, err_ := sha1, err
if len(refentryv) == 3 {
sha1_, err_ = Sha1Parse(refentryv[2])
}
if err != nil || err_ != nil {
return nil, badentry()
}
reporef := refentryv[1]
repopath, ref := reporef_split(reporef)
repo := repotab[repopath]
if repo == nil {
repo = &BackupRepo{repopath, RefMap{}}
repotab[repopath] = repo
}
if _, alreadyin := repo.refs[ref]; alreadyin {
return nil, fmt.Errorf("duplicate ref %q", ref)
}
repo.refs[ref] = BackupRefSha1{sha1, sha1_}
}
return repotab, nil
defer xerr.Contextf(&err, "load backup.refs %q", object)
gerr, backup_refs, _ := ggit("cat-file", "blob", object)
if gerr != nil {
return nil, gerr
}
repotab = make(map[string]*BackupRepo)
for _, refentry := range xstrings.SplitLines(backup_refs, "\n") {
// sha1 prefix+refname (sha1_)
badentry := func() error { return fmt.Errorf("invalid entry: %q", refentry) }
refentryv := strings.Fields(refentry)
if !(2 <= len(refentryv) && len(refentryv) <= 3) {
return nil, badentry()
}
sha1, err := Sha1Parse(refentryv[0])
sha1_, err_ := sha1, err
if len(refentryv) == 3 {
sha1_, err_ = Sha1Parse(refentryv[2])
}
if err != nil || err_ != nil {
return nil, badentry()
}
reporef := refentryv[1]
repopath, ref := reporef_split(reporef)
repo := repotab[repopath]
if repo == nil {
repo = &BackupRepo{repopath, RefMap{}}
repotab[repopath] = repo
}
if _, alreadyin := repo.refs[ref]; alreadyin {
return nil, fmt.Errorf("duplicate ref %q", ref)
}
repo.refs[ref] = BackupRefSha1{sha1, sha1_}
}
return repotab, nil
}
var commands = map[string]func(*git.Repository, []string){
"pull": cmd_pull,
"restore": cmd_restore,
"pull": cmd_pull,
"restore": cmd_restore,
}
func usage() {
fmt.Fprintf(os.Stderr,
fmt.Fprintf(os.Stderr,
`git-backup [options] <command>
pull pull git-repositories and files to backup
......@@ -1222,44 +1221,44 @@ func usage() {
}
func main() {
flag.Usage = usage
quiet := 0
flag.Var((*xflag.Count)(&verbose), "v", "verbosity level")
flag.Var((*xflag.Count)(&quiet), "q", "decrease verbosity")
flag.IntVar(&njobs, "j", njobs, "allow max N jobs to spawn")
flag.Parse()
verbose -= quiet
argv := flag.Args()
if len(argv) == 0 {
usage()
os.Exit(1)
}
cmd := commands[argv[0]]
if cmd == nil {
fmt.Fprintf(os.Stderr, "E: unknown command %q", argv[0])
os.Exit(1)
}
// catch Error and report info from it
here := my.FuncName()
defer exc.Catch(func(e *exc.Error) {
e = exc.Addcallingcontext(here, e)
fmt.Fprintln(os.Stderr, e)
// also show traceback if debug
if verbose > 2 {
fmt.Fprint(os.Stderr, "\n")
debug.PrintStack()
}
os.Exit(1)
})
// backup repository
gb, err := git.OpenRepository(".")
exc.Raiseif(err)
cmd(gb, argv[1:])
flag.Usage = usage
quiet := 0
flag.Var((*xflag.Count)(&verbose), "v", "verbosity level")
flag.Var((*xflag.Count)(&quiet), "q", "decrease verbosity")
flag.IntVar(&njobs, "j", njobs, "allow max N jobs to spawn")
flag.Parse()
verbose -= quiet
argv := flag.Args()
if len(argv) == 0 {
usage()
os.Exit(1)
}
cmd := commands[argv[0]]
if cmd == nil {
fmt.Fprintf(os.Stderr, "E: unknown command %q", argv[0])
os.Exit(1)
}
// catch Error and report info from it
here := my.FuncName()
defer exc.Catch(func(e *exc.Error) {
e = exc.Addcallingcontext(here, e)
fmt.Fprintln(os.Stderr, e)
// also show traceback if debug
if verbose > 2 {
fmt.Fprint(os.Stderr, "\n")
debug.PrintStack()
}
os.Exit(1)
})
// backup repository
gb, err := git.OpenRepository(".")
exc.Raiseif(err)
cmd(gb, argv[1:])
}
......@@ -20,355 +20,355 @@
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
"syscall"
"testing"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/my"
"lab.nexedi.com/kirr/go123/xruntime"
"lab.nexedi.com/kirr/go123/xstrings"
git "github.com/libgit2/git2go"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
"syscall"
"testing"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/my"
"lab.nexedi.com/kirr/go123/xruntime"
"lab.nexedi.com/kirr/go123/xstrings"
git "github.com/libgit2/git2go"
)
func xgetcwd(t *testing.T) string {
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
return cwd
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
return cwd
}
func xchdir(t *testing.T, dir string) {
err := os.Chdir(dir)
if err != nil {
t.Fatal(err)
}
err := os.Chdir(dir)
if err != nil {
t.Fatal(err)
}
}
func XSha1(s string) Sha1 {
sha1, err := Sha1Parse(s)
if err != nil {
panic(err)
}
return sha1
sha1, err := Sha1Parse(s)
if err != nil {
panic(err)
}
return sha1
}
func xgittype(s string) git.ObjectType {
type_, ok := gittype(s)
if !ok {
exc.Raisef("unknown git type %q", s)
}
return type_
type_, ok := gittype(s)
if !ok {
exc.Raisef("unknown git type %q", s)
}
return type_
}
// verify end-to-end pull-restore
func TestPullRestore(t *testing.T) {
// if something raises -> don't let testing panic - report it as proper error with context.
here := my.FuncName()
defer exc.Catch(func(e *exc.Error) {
e = exc.Addcallingcontext(here, e)
// add file:line for failing code inside testing function - so we have exact context to debug
failedat := ""
for _, f := range xruntime.Traceback(1) {
if f.Function == here {
failedat = fmt.Sprintf("%s:%d", filepath.Base(f.File), f.Line)
break
}
}
if failedat == "" {
panic(fmt.Errorf("cannot lookup failedat for %s", here))
}
t.Errorf("%s: %v", failedat, e)
})
workdir, err := ioutil.TempDir("", "t-git-backup")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(workdir)
mydir := xgetcwd(t)
xchdir(t, workdir)
defer xchdir(t, mydir)
// -test.v -> verbosity of git-backup
if testing.Verbose() {
verbose = 1
} else {
verbose = 0
}
// init backup repository
xgit("init", "--bare", "backup.git")
xchdir(t, "backup.git")
gb, err := git.OpenRepository(".")
if err != nil {
t.Fatal(err)
}
// pull from testdata
my0 := mydir + "/testdata/0"
cmd_pull(gb, []string{my0+":b0"}) // only empty repo in testdata/0
my1 := mydir + "/testdata/1"
cmd_pull(gb, []string{my1+":b1"})
// verify tag/tree/blob encoding is 1) consistent and 2) always the same.
// we need it be always the same so different git-backup versions can
// interoperate with each other.
var noncommitv = []struct{
sha1 Sha1 // original
sha1_ Sha1 // encoded
istag bool // is original object a tag object
}{
{XSha1("f735011c9fcece41219729a33f7876cd8791f659"), XSha1("4f2486e99ff9744751e0756b155e57bb24c453dd"), true}, // tag-to-commit
{XSha1("7124713e403925bc772cd252b0dec099f3ced9c5"), XSha1("6b3beabee3e0704fa3269558deab01e9d5d7764e"), true}, // tag-to-tag
{XSha1("11e67095628aa17b03436850e690faea3006c25d"), XSha1("89ad5fbeb9d3f0c7bc6366855a09484819289911"), true}, // tag-to-blob
{XSha1("ba899e5639273a6fa4d50d684af8db1ae070351e"), XSha1("68ad6a7c31042e53201e47aee6096ed081b6fdb9"), true}, // tag-to-tree
{XSha1("61882eb85774ed4401681d800bb9c638031375e2"), XSha1("761f55bcdf119ced3fcf23b69fdc169cbb5fc143"), false}, // ref-to-tree
{XSha1("7a3343f584218e973165d943d7c0af47a52ca477"), XSha1("366f3598d662909e2537481852e42b775c7eb837"), false}, // ref-to-blob
}
for _, nc := range noncommitv {
// encoded object should be already present
_, err := ReadObject2(gb, nc.sha1_)
if err != nil {
t.Fatalf("encode %s should give %s but expected encoded object not found: %s", nc.sha1, nc.sha1_, err)
}
// decoding encoded object should give original sha1, if it was tag
sha1 := obj_recreate_from_commit(gb, nc.sha1_)
if nc.istag && sha1 != nc.sha1 {
t.Fatalf("decode %s -> %s ; want %s", nc.sha1_, sha1, nc.sha1)
}
// encoding original object should give sha1_
obj_type := xgit("cat-file", "-t", nc.sha1)
sha1_ := obj_represent_as_commit(gb, nc.sha1, xgittype(obj_type))
if sha1_ != nc.sha1_ {
t.Fatalf("encode %s -> %s ; want %s", sha1, sha1_, nc.sha1_)
}
}
// checks / cleanups after cmd_pull
afterPull := func() {
// verify no garbage is left under refs/backup/
dentryv, err := ioutil.ReadDir("refs/backup/")
if err != nil && !os.IsNotExist(err) {
t.Fatal(err)
}
if len(dentryv) != 0 {
namev := []string{}
for _, fi := range dentryv {
namev = append(namev, fi.Name())
}
t.Fatalf("refs/backup/ not empty after pull: %v", namev)
}
// prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits)
xgit("prune")
// verify backup repo is all ok
xgit("fsck")
// verify that just pulled tag objects are now gone after pruning -
// - they become not directly git-present. The only possibility to
// get them back is via recreating from encoded commit objects.
for _, nc := range noncommitv {
if !nc.istag {
continue
}
gerr, _, _ := ggit("cat-file", "-p", nc.sha1)
if gerr == nil {
t.Fatalf("tag %s still present in backup.git after git-prune", nc.sha1)
}
}
// reopen backup repository - to avoid having stale cache with present
// objects we deleted above with `git prune`
gb, err = git.OpenRepository(".")
if err != nil {
t.Fatal(err)
}
}
afterPull()
// pull again - it should be noop
h1 := xgitSha1("rev-parse", "HEAD")
cmd_pull(gb, []string{my1+":b1"})
afterPull()
h2 := xgitSha1("rev-parse", "HEAD")
if h1 == h2 {
t.Fatal("pull: second run did not ajusted HEAD")
}
δ12 := xgit("diff", h1, h2)
if δ12 != "" {
t.Fatalf("pull: second run was not noop: δ:\n%s", δ12)
}
// restore backup
work1 := workdir + "/1"
cmd_restore(gb, []string{"HEAD", "b1:"+work1})
// verify files restored to the same as original
gerr, diff, _ := ggit("diff", "--no-index", "--raw", "--exit-code", my1, work1)
// 0 - no diff, 1 - has diff, 2 - problem
if gerr != nil && gerr.Sys().(syscall.WaitStatus).ExitStatus() > 1 {
t.Fatal(gerr)
}
gitObjectsRe := regexp.MustCompile(`\.git/objects/`)
for _, diffline := range strings.Split(diff, "\n") {
// :srcmode dstmode srcsha1 dstsha1 status\tpath
_, path, err := xstrings.HeadTail(diffline, "\t")
if err != nil {
t.Fatalf("restorecheck: cannot parse diff line %q", diffline)
}
// git objects can be represented differently (we check them later)
if gitObjectsRe.FindString(path) != "" {
continue
}
t.Fatal("restorecheck: unexpected diff:", diffline)
}
// verify git objects restored to the same as original
err = filepath.Walk(my1, func(path string, info os.FileInfo, err error) error {
// any error -> stop
if err != nil {
return err
}
// non *.git/ -- not interesting
if !(info.IsDir() && strings.HasSuffix(path, ".git")) {
return nil
}
// found git repo - check refs & objects in original and restored are exactly the same,
var R = [2]struct{ path, reflist, revlist string }{
{path: path}, // original
{path: reprefix(my1, work1, path)}, // restored
}
for _, repo := range R {
// fsck just in case
xgit("--git-dir=" + repo.path, "fsck")
// NOTE for-each-ref sorts output by refname
repo.reflist = xgit("--git-dir=" + repo.path, "for-each-ref")
// NOTE rev-list emits objects in reverse chronological order,
// starting from refs roots which are also ordered by refname
repo.revlist = xgit("--git-dir=" + repo.path, "rev-list", "--all", "--objects")
}
if R[0].reflist != R[1].reflist {
t.Fatalf("restorecheck: %q restored with different reflist (in %q)", R[0].path, R[1].path)
}
if R[0].revlist != R[1].revlist {
t.Fatalf("restorecheck: %q restored with differrent objects (in %q)", R[0].path, R[1].path)
}
// .git verified - no need to recurse
return filepath.SkipDir
})
if err != nil {
t.Fatal(err)
}
// now try to pull corrupt repo - pull should refuse if transferred pack contains bad objects
my2 := mydir + "/testdata/2"
func() {
defer exc.Catch(func(e *exc.Error) {
// it ok - pull should raise
// git-backup leaves backup repo locked on error
xgit("update-ref", "-d", "refs/backup.locked")
})
cmd_pull(gb, []string{my2+":b2"})
t.Fatal("pull corrupt.git: did not complain")
}()
// now try to pull repo where `git pack-objects` misbehaves
my3 := mydir + "/testdata/3"
checkIncompletePack := func(kind, errExpect string) {
defer exc.Catch(func(e *exc.Error) {
estr := e.Error()
bad := ""
badf := func(format string, argv ...interface{}) {
bad += fmt.Sprintf(format+"\n", argv...)
}
if !strings.Contains(estr, errExpect) {
badf("- no %q", errExpect)
}
if bad != "" {
t.Fatalf("pull incomplete-send-pack.git/%s: complained, but error is wrong:\n%s\nerror: %s", kind, bad, estr)
}
// git-backup leaves backup repo locked on error
xgit("update-ref", "-d", "refs/backup.locked")
})
// for incomplete-send-pack.git to indeed send incomplete pack, its git
// config has to be activated via tweaked $HOME.
home, ok := os.LookupEnv("HOME")
defer func() {
if ok {
err = os.Setenv("HOME", home)
} else {
err = os.Unsetenv("HOME")
}
exc.Raiseif(err)
}()
err = os.Setenv("HOME", my3+"/incomplete-send-pack.git/"+kind)
exc.Raiseif(err)
cmd_pull(gb, []string{my3+":b3"})
t.Fatalf("pull incomplete-send-pack.git/%s: did not complain", kind)
}
// missing blob: should be caught by git itself, because unpack-objects
// performs full reachability checks of fetched tips.
checkIncompletePack("x-missing-blob", "fatal: unpack-objects")
// missing commit: remote sends a pack that is closed under reachability,
// but it has objects starting from only parent of requested tip. This way
// e.g. commit at tip itself is not sent and the fact that it is missing in
// the pack is not caught by fetch-pack. git-backup has to detect the
// problem itself.
checkIncompletePack("x-commit-send-parent", "remote did not send all neccessary objects")
// pulling incomplete-send-pack.git without pack-objects hook must succeed:
// without $HOME tweaks full and complete pack is sent.
cmd_pull(gb, []string{my3+":b3"})
// if something raises -> don't let testing panic - report it as proper error with context.
here := my.FuncName()
defer exc.Catch(func(e *exc.Error) {
e = exc.Addcallingcontext(here, e)
// add file:line for failing code inside testing function - so we have exact context to debug
failedat := ""
for _, f := range xruntime.Traceback(1) {
if f.Function == here {
failedat = fmt.Sprintf("%s:%d", filepath.Base(f.File), f.Line)
break
}
}
if failedat == "" {
panic(fmt.Errorf("cannot lookup failedat for %s", here))
}
t.Errorf("%s: %v", failedat, e)
})
workdir, err := ioutil.TempDir("", "t-git-backup")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(workdir)
mydir := xgetcwd(t)
xchdir(t, workdir)
defer xchdir(t, mydir)
// -test.v -> verbosity of git-backup
if testing.Verbose() {
verbose = 1
} else {
verbose = 0
}
// init backup repository
xgit("init", "--bare", "backup.git")
xchdir(t, "backup.git")
gb, err := git.OpenRepository(".")
if err != nil {
t.Fatal(err)
}
// pull from testdata
my0 := mydir + "/testdata/0"
cmd_pull(gb, []string{my0 + ":b0"}) // only empty repo in testdata/0
my1 := mydir + "/testdata/1"
cmd_pull(gb, []string{my1 + ":b1"})
// verify tag/tree/blob encoding is 1) consistent and 2) always the same.
// we need it be always the same so different git-backup versions can
// interoperate with each other.
var noncommitv = []struct {
sha1 Sha1 // original
sha1_ Sha1 // encoded
istag bool // is original object a tag object
}{
{XSha1("f735011c9fcece41219729a33f7876cd8791f659"), XSha1("4f2486e99ff9744751e0756b155e57bb24c453dd"), true}, // tag-to-commit
{XSha1("7124713e403925bc772cd252b0dec099f3ced9c5"), XSha1("6b3beabee3e0704fa3269558deab01e9d5d7764e"), true}, // tag-to-tag
{XSha1("11e67095628aa17b03436850e690faea3006c25d"), XSha1("89ad5fbeb9d3f0c7bc6366855a09484819289911"), true}, // tag-to-blob
{XSha1("ba899e5639273a6fa4d50d684af8db1ae070351e"), XSha1("68ad6a7c31042e53201e47aee6096ed081b6fdb9"), true}, // tag-to-tree
{XSha1("61882eb85774ed4401681d800bb9c638031375e2"), XSha1("761f55bcdf119ced3fcf23b69fdc169cbb5fc143"), false}, // ref-to-tree
{XSha1("7a3343f584218e973165d943d7c0af47a52ca477"), XSha1("366f3598d662909e2537481852e42b775c7eb837"), false}, // ref-to-blob
}
for _, nc := range noncommitv {
// encoded object should be already present
_, err := ReadObject2(gb, nc.sha1_)
if err != nil {
t.Fatalf("encode %s should give %s but expected encoded object not found: %s", nc.sha1, nc.sha1_, err)
}
// decoding encoded object should give original sha1, if it was tag
sha1 := obj_recreate_from_commit(gb, nc.sha1_)
if nc.istag && sha1 != nc.sha1 {
t.Fatalf("decode %s -> %s ; want %s", nc.sha1_, sha1, nc.sha1)
}
// encoding original object should give sha1_
obj_type := xgit("cat-file", "-t", nc.sha1)
sha1_ := obj_represent_as_commit(gb, nc.sha1, xgittype(obj_type))
if sha1_ != nc.sha1_ {
t.Fatalf("encode %s -> %s ; want %s", sha1, sha1_, nc.sha1_)
}
}
// checks / cleanups after cmd_pull
afterPull := func() {
// verify no garbage is left under refs/backup/
dentryv, err := ioutil.ReadDir("refs/backup/")
if err != nil && !os.IsNotExist(err) {
t.Fatal(err)
}
if len(dentryv) != 0 {
namev := []string{}
for _, fi := range dentryv {
namev = append(namev, fi.Name())
}
t.Fatalf("refs/backup/ not empty after pull: %v", namev)
}
// prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits)
xgit("prune")
// verify backup repo is all ok
xgit("fsck")
// verify that just pulled tag objects are now gone after pruning -
// - they become not directly git-present. The only possibility to
// get them back is via recreating from encoded commit objects.
for _, nc := range noncommitv {
if !nc.istag {
continue
}
gerr, _, _ := ggit("cat-file", "-p", nc.sha1)
if gerr == nil {
t.Fatalf("tag %s still present in backup.git after git-prune", nc.sha1)
}
}
// reopen backup repository - to avoid having stale cache with present
// objects we deleted above with `git prune`
gb, err = git.OpenRepository(".")
if err != nil {
t.Fatal(err)
}
}
afterPull()
// pull again - it should be noop
h1 := xgitSha1("rev-parse", "HEAD")
cmd_pull(gb, []string{my1 + ":b1"})
afterPull()
h2 := xgitSha1("rev-parse", "HEAD")
if h1 == h2 {
t.Fatal("pull: second run did not ajusted HEAD")
}
δ12 := xgit("diff", h1, h2)
if δ12 != "" {
t.Fatalf("pull: second run was not noop: δ:\n%s", δ12)
}
// restore backup
work1 := workdir + "/1"
cmd_restore(gb, []string{"HEAD", "b1:" + work1})
// verify files restored to the same as original
gerr, diff, _ := ggit("diff", "--no-index", "--raw", "--exit-code", my1, work1)
// 0 - no diff, 1 - has diff, 2 - problem
if gerr != nil && gerr.Sys().(syscall.WaitStatus).ExitStatus() > 1 {
t.Fatal(gerr)
}
gitObjectsRe := regexp.MustCompile(`\.git/objects/`)
for _, diffline := range strings.Split(diff, "\n") {
// :srcmode dstmode srcsha1 dstsha1 status\tpath
_, path, err := xstrings.HeadTail(diffline, "\t")
if err != nil {
t.Fatalf("restorecheck: cannot parse diff line %q", diffline)
}
// git objects can be represented differently (we check them later)
if gitObjectsRe.FindString(path) != "" {
continue
}
t.Fatal("restorecheck: unexpected diff:", diffline)
}
// verify git objects restored to the same as original
err = filepath.Walk(my1, func(path string, info os.FileInfo, err error) error {
// any error -> stop
if err != nil {
return err
}
// non *.git/ -- not interesting
if !(info.IsDir() && strings.HasSuffix(path, ".git")) {
return nil
}
// found git repo - check refs & objects in original and restored are exactly the same,
var R = [2]struct{ path, reflist, revlist string }{
{path: path}, // original
{path: reprefix(my1, work1, path)}, // restored
}
for _, repo := range R {
// fsck just in case
xgit("--git-dir="+repo.path, "fsck")
// NOTE for-each-ref sorts output by refname
repo.reflist = xgit("--git-dir="+repo.path, "for-each-ref")
// NOTE rev-list emits objects in reverse chronological order,
// starting from refs roots which are also ordered by refname
repo.revlist = xgit("--git-dir="+repo.path, "rev-list", "--all", "--objects")
}
if R[0].reflist != R[1].reflist {
t.Fatalf("restorecheck: %q restored with different reflist (in %q)", R[0].path, R[1].path)
}
if R[0].revlist != R[1].revlist {
t.Fatalf("restorecheck: %q restored with differrent objects (in %q)", R[0].path, R[1].path)
}
// .git verified - no need to recurse
return filepath.SkipDir
})
if err != nil {
t.Fatal(err)
}
// now try to pull corrupt repo - pull should refuse if transferred pack contains bad objects
my2 := mydir + "/testdata/2"
func() {
defer exc.Catch(func(e *exc.Error) {
// it ok - pull should raise
// git-backup leaves backup repo locked on error
xgit("update-ref", "-d", "refs/backup.locked")
})
cmd_pull(gb, []string{my2 + ":b2"})
t.Fatal("pull corrupt.git: did not complain")
}()
// now try to pull repo where `git pack-objects` misbehaves
my3 := mydir + "/testdata/3"
checkIncompletePack := func(kind, errExpect string) {
defer exc.Catch(func(e *exc.Error) {
estr := e.Error()
bad := ""
badf := func(format string, argv ...interface{}) {
bad += fmt.Sprintf(format+"\n", argv...)
}
if !strings.Contains(estr, errExpect) {
badf("- no %q", errExpect)
}
if bad != "" {
t.Fatalf("pull incomplete-send-pack.git/%s: complained, but error is wrong:\n%s\nerror: %s", kind, bad, estr)
}
// git-backup leaves backup repo locked on error
xgit("update-ref", "-d", "refs/backup.locked")
})
// for incomplete-send-pack.git to indeed send incomplete pack, its git
// config has to be activated via tweaked $HOME.
home, ok := os.LookupEnv("HOME")
defer func() {
if ok {
err = os.Setenv("HOME", home)
} else {
err = os.Unsetenv("HOME")
}
exc.Raiseif(err)
}()
err = os.Setenv("HOME", my3+"/incomplete-send-pack.git/"+kind)
exc.Raiseif(err)
cmd_pull(gb, []string{my3 + ":b3"})
t.Fatalf("pull incomplete-send-pack.git/%s: did not complain", kind)
}
// missing blob: should be caught by git itself, because unpack-objects
// performs full reachability checks of fetched tips.
checkIncompletePack("x-missing-blob", "fatal: unpack-objects")
// missing commit: remote sends a pack that is closed under reachability,
// but it has objects starting from only parent of requested tip. This way
// e.g. commit at tip itself is not sent and the fact that it is missing in
// the pack is not caught by fetch-pack. git-backup has to detect the
// problem itself.
checkIncompletePack("x-commit-send-parent", "remote did not send all neccessary objects")
// pulling incomplete-send-pack.git without pack-objects hook must succeed:
// without $HOME tweaks full and complete pack is sent.
cmd_pull(gb, []string{my3 + ":b3"})
}
func TestRepoRefSplit(t *testing.T) {
var tests = []struct{ reporef, repo, ref string }{
{"kirr/wendelin.core.git/heads/master", "kirr/wendelin.core.git", "heads/master"},
{"kirr/erp5.git/backup/x/master+erp5-data-notebook", "kirr/erp5.git", "backup/x/master+erp5-data-notebook"},
{"tiwariayush/Discussion%20Forum%20.git/...", "tiwariayush/Discussion Forum .git", "..."},
{"tiwariayush/Discussion%20Forum+.git/...", "tiwariayush/Discussion Forum+.git", "..."},
{"tiwariayush/Discussion%2BForum+.git/...", "tiwariayush/Discussion+Forum+.git", "..."},
}
for _, tt := range tests {
repo, ref := reporef_split(tt.reporef)
if repo != tt.repo || ref != tt.ref {
t.Errorf("reporef_split(%q) -> %q %q ; want %q %q", tt.reporef, repo, ref, tt.repo, tt.ref)
}
}
var tests = []struct{ reporef, repo, ref string }{
{"kirr/wendelin.core.git/heads/master", "kirr/wendelin.core.git", "heads/master"},
{"kirr/erp5.git/backup/x/master+erp5-data-notebook", "kirr/erp5.git", "backup/x/master+erp5-data-notebook"},
{"tiwariayush/Discussion%20Forum%20.git/...", "tiwariayush/Discussion Forum .git", "..."},
{"tiwariayush/Discussion%20Forum+.git/...", "tiwariayush/Discussion Forum+.git", "..."},
{"tiwariayush/Discussion%2BForum+.git/...", "tiwariayush/Discussion+Forum+.git", "..."},
}
for _, tt := range tests {
repo, ref := reporef_split(tt.reporef)
if repo != tt.repo || ref != tt.ref {
t.Errorf("reporef_split(%q) -> %q %q ; want %q %q", tt.reporef, repo, ref, tt.repo, tt.ref)
}
}
}
......@@ -21,142 +21,142 @@ package main
// Git-backup | Run git subprocess
import (
"bytes"
"fmt"
"os"
"os/exec"
"strings"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/mem"
"bytes"
"fmt"
"os"
"os/exec"
"strings"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/mem"
)
// how/whether to redirect stdio of spawned process
type StdioRedirect int
const (
PIPE StdioRedirect = iota // connect stdio channel via PIPE to parent (default value)
DontRedirect
PIPE StdioRedirect = iota // connect stdio channel via PIPE to parent (default value)
DontRedirect
)
type RunWith struct {
stdin string
stdout StdioRedirect // PIPE | DontRedirect
stderr StdioRedirect // PIPE | DontRedirect
raw bool // !raw -> stdout, stderr are stripped
env map[string]string // !nil -> subprocess environment setup from env
stdin string
stdout StdioRedirect // PIPE | DontRedirect
stderr StdioRedirect // PIPE | DontRedirect
raw bool // !raw -> stdout, stderr are stripped
env map[string]string // !nil -> subprocess environment setup from env
}
// run `git *argv` -> error, stdout, stderr
func _git(argv []string, ctx RunWith) (err error, stdout, stderr string) {
debugf("git %s", strings.Join(argv, " "))
cmd := exec.Command("git", argv...)
stdoutBuf := bytes.Buffer{}
stderrBuf := bytes.Buffer{}
if ctx.stdin != "" {
cmd.Stdin = strings.NewReader(ctx.stdin)
}
switch ctx.stdout {
case PIPE:
cmd.Stdout = &stdoutBuf
case DontRedirect:
cmd.Stdout = os.Stdout
default:
panic("git: stdout redirect mode invalid")
}
switch ctx.stderr {
case PIPE:
cmd.Stderr = &stderrBuf
case DontRedirect:
cmd.Stderr = os.Stderr
default:
panic("git: stderr redirect mode invalid")
}
if ctx.env != nil {
env := []string{}
for k, v := range ctx.env {
env = append(env, k+"="+v)
}
cmd.Env = env
}
err = cmd.Run()
stdout = mem.String(stdoutBuf.Bytes())
stderr = mem.String(stderrBuf.Bytes())
if !ctx.raw {
// prettify stdout (e.g. so that 'sha1\n' becomes 'sha1' and can be used directly
stdout = strings.TrimSpace(stdout)
stderr = strings.TrimSpace(stderr)
}
return err, stdout, stderr
debugf("git %s", strings.Join(argv, " "))
cmd := exec.Command("git", argv...)
stdoutBuf := bytes.Buffer{}
stderrBuf := bytes.Buffer{}
if ctx.stdin != "" {
cmd.Stdin = strings.NewReader(ctx.stdin)
}
switch ctx.stdout {
case PIPE:
cmd.Stdout = &stdoutBuf
case DontRedirect:
cmd.Stdout = os.Stdout
default:
panic("git: stdout redirect mode invalid")
}
switch ctx.stderr {
case PIPE:
cmd.Stderr = &stderrBuf
case DontRedirect:
cmd.Stderr = os.Stderr
default:
panic("git: stderr redirect mode invalid")
}
if ctx.env != nil {
env := []string{}
for k, v := range ctx.env {
env = append(env, k+"="+v)
}
cmd.Env = env
}
err = cmd.Run()
stdout = mem.String(stdoutBuf.Bytes())
stderr = mem.String(stderrBuf.Bytes())
if !ctx.raw {
// prettify stdout (e.g. so that 'sha1\n' becomes 'sha1' and can be used directly
stdout = strings.TrimSpace(stdout)
stderr = strings.TrimSpace(stderr)
}
return err, stdout, stderr
}
// error a git command returned
type GitError struct {
GitErrContext
*exec.ExitError
GitErrContext
*exec.ExitError
}
type GitErrContext struct {
argv []string
stdin string
stdout string
stderr string
argv []string
stdin string
stdout string
stderr string
}
func (e *GitError) Error() string {
msg := e.GitErrContext.Error()
if e.stderr == "" {
msg += "(failed)\n"
}
return msg
msg := e.GitErrContext.Error()
if e.stderr == "" {
msg += "(failed)\n"
}
return msg
}
func (e *GitErrContext) Error() string {
msg := "git " + strings.Join(e.argv, " ")
if e.stdin == "" {
msg += " </dev/null\n"
} else {
msg += " <<EOF\n" + e.stdin
if !strings.HasSuffix(msg, "\n") {
msg += "\n"
}
msg += "EOF\n"
}
msg += e.stderr
if !strings.HasSuffix(msg, "\n") {
msg += "\n"
}
return msg
msg := "git " + strings.Join(e.argv, " ")
if e.stdin == "" {
msg += " </dev/null\n"
} else {
msg += " <<EOF\n" + e.stdin
if !strings.HasSuffix(msg, "\n") {
msg += "\n"
}
msg += "EOF\n"
}
msg += e.stderr
if !strings.HasSuffix(msg, "\n") {
msg += "\n"
}
return msg
}
// argv -> []string, ctx (for passing argv + RunWith handy - see ggit() for details)
func _gitargv(argv ...interface{}) (argvs []string, ctx RunWith) {
ctx_seen := false
for _, arg := range argv {
switch arg := arg.(type) {
case string:
argvs = append(argvs, arg)
default:
argvs = append(argvs, fmt.Sprint(arg))
case RunWith:
if ctx_seen {
panic("git: multiple RunWith contexts")
}
ctx, ctx_seen = arg, true
}
}
return argvs, ctx
ctx_seen := false
for _, arg := range argv {
switch arg := arg.(type) {
case string:
argvs = append(argvs, arg)
default:
argvs = append(argvs, fmt.Sprint(arg))
case RunWith:
if ctx_seen {
panic("git: multiple RunWith contexts")
}
ctx, ctx_seen = arg, true
}
}
return argvs, ctx
}
// run `git *argv` -> err, stdout, stderr
......@@ -167,59 +167,59 @@ func _gitargv(argv ...interface{}) (argvs []string, ctx RunWith) {
//
// NOTE err is concrete *GitError, not error
func ggit(argv ...interface{}) (err *GitError, stdout, stderr string) {
return ggit2(_gitargv(argv...))
return ggit2(_gitargv(argv...))
}
func ggit2(argv []string, ctx RunWith) (err *GitError, stdout, stderr string) {
e, stdout, stderr := _git(argv, ctx)
eexec, _ := e.(*exec.ExitError)
if e != nil && eexec == nil {
exc.Raisef("git %s : ", strings.Join(argv, " "), e)
}
if eexec != nil {
err = &GitError{GitErrContext{argv, ctx.stdin, stdout, stderr}, eexec}
}
return err, stdout, stderr
e, stdout, stderr := _git(argv, ctx)
eexec, _ := e.(*exec.ExitError)
if e != nil && eexec == nil {
exc.Raisef("git %s : ", strings.Join(argv, " "), e)
}
if eexec != nil {
err = &GitError{GitErrContext{argv, ctx.stdin, stdout, stderr}, eexec}
}
return err, stdout, stderr
}
// run `git *argv` -> stdout
// on error - raise exception
func xgit(argv ...interface{}) string {
return xgit2(_gitargv(argv...))
return xgit2(_gitargv(argv...))
}
func xgit2(argv []string, ctx RunWith) string {
gerr, stdout, _ := ggit2(argv, ctx)
if gerr != nil {
exc.Raise(gerr)
}
return stdout
gerr, stdout, _ := ggit2(argv, ctx)
if gerr != nil {
exc.Raise(gerr)
}
return stdout
}
// like xgit(), but automatically parse stdout to Sha1
func xgitSha1(argv ...interface{}) Sha1 {
return xgit2Sha1(_gitargv(argv...))
return xgit2Sha1(_gitargv(argv...))
}
// error when git output is not valid sha1
type GitSha1Error struct {
GitErrContext
GitErrContext
}
func (e *GitSha1Error) Error() string {
msg := e.GitErrContext.Error()
msg += fmt.Sprintf("expected valid sha1 (got %q)\n", e.stdout)
return msg
msg := e.GitErrContext.Error()
msg += fmt.Sprintf("expected valid sha1 (got %q)\n", e.stdout)
return msg
}
func xgit2Sha1(argv []string, ctx RunWith) Sha1 {
gerr, stdout, stderr := ggit2(argv, ctx)
if gerr != nil {
exc.Raise(gerr)
}
sha1, err := Sha1Parse(stdout)
if err != nil {
exc.Raise(&GitSha1Error{GitErrContext{argv, ctx.stdin, stdout, stderr}})
}
return sha1
gerr, stdout, stderr := ggit2(argv, ctx)
if gerr != nil {
exc.Raise(gerr)
}
sha1, err := Sha1Parse(stdout)
if err != nil {
exc.Raise(&GitSha1Error{GitErrContext{argv, ctx.stdin, stdout, stderr}})
}
return sha1
}
......@@ -21,80 +21,80 @@ package main
// Git-backup | Git object: Blob Tree Commit Tag
import (
"errors"
"fmt"
"os"
"os/user"
"sync"
"time"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/xstrings"
git "github.com/libgit2/git2go"
"errors"
"fmt"
"os"
"os/user"
"sync"
"time"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/xstrings"
git "github.com/libgit2/git2go"
)
// read/write raw objects
func ReadObject(g *git.Repository, sha1 Sha1, objtype git.ObjectType) (*git.OdbObject, error) {
obj, err := ReadObject2(g, sha1)
if err != nil {
return nil, err
}
if objtype != obj.Type() {
return nil, &UnexpectedObjType{obj, objtype}
}
return obj, nil
obj, err := ReadObject2(g, sha1)
if err != nil {
return nil, err
}
if objtype != obj.Type() {
return nil, &UnexpectedObjType{obj, objtype}
}
return obj, nil
}
func ReadObject2(g *git.Repository, sha1 Sha1) (*git.OdbObject, error) {
odb, err := g.Odb()
if err != nil {
return nil, &OdbNotReady{g, err}
}
obj, err := odb.Read(sha1.AsOid())
if err != nil {
return nil, err
}
return obj, nil
odb, err := g.Odb()
if err != nil {
return nil, &OdbNotReady{g, err}
}
obj, err := odb.Read(sha1.AsOid())
if err != nil {
return nil, err
}
return obj, nil
}
func WriteObject(g *git.Repository, content []byte, objtype git.ObjectType) (Sha1, error) {
odb, err := g.Odb()
if err != nil {
return Sha1{}, &OdbNotReady{g, err}
}
oid, err := odb.Write(content, objtype)
if err != nil {
// err is e.g. "Failed to create temporary file '.../objects/tmp_object_git2_G045iN': Permission denied"
return Sha1{}, err
}
return Sha1FromOid(oid), nil
odb, err := g.Odb()
if err != nil {
return Sha1{}, &OdbNotReady{g, err}
}
oid, err := odb.Write(content, objtype)
if err != nil {
// err is e.g. "Failed to create temporary file '.../objects/tmp_object_git2_G045iN': Permission denied"
return Sha1{}, err
}
return Sha1FromOid(oid), nil
}
type OdbNotReady struct {
g *git.Repository
err error
g *git.Repository
err error
}
func (e *OdbNotReady) Error() string {
return fmt.Sprintf("git(%q): odb not ready: %s", e.g.Path(), e.err)
return fmt.Sprintf("git(%q): odb not ready: %s", e.g.Path(), e.err)
}
type UnexpectedObjType struct {
obj *git.OdbObject
wantType git.ObjectType
obj *git.OdbObject
wantType git.ObjectType
}
func (e *UnexpectedObjType) Error() string {
return fmt.Sprintf("%s: type is %s (expected %s)", e.obj.Id(), e.obj.Type(), e.wantType)
return fmt.Sprintf("%s: type is %s (expected %s)", e.obj.Id(), e.obj.Type(), e.wantType)
}
type Tag struct {
tagged_type git.ObjectType
tagged_sha1 Sha1
// TODO msg
tagged_type git.ObjectType
tagged_sha1 Sha1
// TODO msg
}
// load/parse Tag
......@@ -105,69 +105,69 @@ type Tag struct {
// - we need to have tag_parse() -- a way to parse object from a buffer
// (libgit2 does not provide such functionality at all)
func xload_tag(g *git.Repository, tag_sha1 Sha1) (tag *Tag, tag_obj *git.OdbObject) {
tag_obj, err := ReadObject(g, tag_sha1, git.ObjectTag)
exc.Raiseif(err)
tag, err = tag_parse(mem.String(tag_obj.Data()))
if err != nil {
exc.Raise(&TagLoadError{tag_sha1, err})
}
return tag, tag_obj
tag_obj, err := ReadObject(g, tag_sha1, git.ObjectTag)
exc.Raiseif(err)
tag, err = tag_parse(mem.String(tag_obj.Data()))
if err != nil {
exc.Raise(&TagLoadError{tag_sha1, err})
}
return tag, tag_obj
}
type TagLoadError struct {
tag_sha1 Sha1
err error
tag_sha1 Sha1
err error
}
func (e *TagLoadError) Error() string {
return fmt.Sprintf("tag %s: %s", e.tag_sha1, e.err)
return fmt.Sprintf("tag %s: %s", e.tag_sha1, e.err)
}
func tag_parse(tag_raw string) (*Tag, error) {
t := Tag{}
tagged_type := ""
_, err := fmt.Sscanf(tag_raw, "object %s\ntype %s\n", &t.tagged_sha1, &tagged_type)
if err != nil {
return nil, errors.New("invalid header")
}
var ok bool
t.tagged_type, ok = gittype(tagged_type)
if !ok {
return nil, fmt.Errorf("invalid tagged type %q", tagged_type)
}
return &t, nil
t := Tag{}
tagged_type := ""
_, err := fmt.Sscanf(tag_raw, "object %s\ntype %s\n", &t.tagged_sha1, &tagged_type)
if err != nil {
return nil, errors.New("invalid header")
}
var ok bool
t.tagged_type, ok = gittype(tagged_type)
if !ok {
return nil, fmt.Errorf("invalid tagged type %q", tagged_type)
}
return &t, nil
}
// parse lstree entry
func parse_lstree_entry(lsentry string) (mode uint32, type_ string, sha1 Sha1, filename string, err error) {
// <mode> SP <type> SP <object> TAB <file> # NOTE file can contain spaces
__, filename, err1 := xstrings.HeadTail(lsentry, "\t")
_, err2 := fmt.Sscanf(__, "%o %s %s\n", &mode, &type_, &sha1)
// <mode> SP <type> SP <object> TAB <file> # NOTE file can contain spaces
__, filename, err1 := xstrings.HeadTail(lsentry, "\t")
_, err2 := fmt.Sscanf(__, "%o %s %s\n", &mode, &type_, &sha1)
if err1 != nil || err2 != nil {
return 0, "", Sha1{}, "", &InvalidLstreeEntry{lsentry}
}
if err1 != nil || err2 != nil {
return 0, "", Sha1{}, "", &InvalidLstreeEntry{lsentry}
}
// parsed ok
return
// parsed ok
return
}
type InvalidLstreeEntry struct {
lsentry string
lsentry string
}
func (e *InvalidLstreeEntry) Error() string {
return fmt.Sprintf("invalid ls-tree entry %q", e.lsentry)
return fmt.Sprintf("invalid ls-tree entry %q", e.lsentry)
}
// create empty git tree -> tree sha1
var tree_empty Sha1
func mktree_empty() Sha1 {
if tree_empty.IsNull() {
tree_empty = xgitSha1("mktree", RunWith{stdin: ""})
}
return tree_empty
if tree_empty.IsNull() {
tree_empty = xgitSha1("mktree", RunWith{stdin: ""})
}
return tree_empty
}
// commit tree
......@@ -178,84 +178,84 @@ func mktree_empty() Sha1 {
type AuthorInfo git.Signature
func (ai *AuthorInfo) String() string {
_, toffset := ai.When.Zone()
// offset: Git wants in minutes, .Zone() gives in seconds
return fmt.Sprintf("%s <%s> %d %+05d", ai.Name, ai.Email, ai.When.Unix(), toffset / 60)
_, toffset := ai.When.Zone()
// offset: Git wants in minutes, .Zone() gives in seconds
return fmt.Sprintf("%s <%s> %d %+05d", ai.Name, ai.Email, ai.When.Unix(), toffset/60)
}
var (
defaultIdent AuthorInfo // default ident without date
defaultIdentOnce sync.Once
defaultIdent AuthorInfo // default ident without date
defaultIdentOnce sync.Once
)
func getDefaultIdent(g *git.Repository) AuthorInfo {
sig, err := g.DefaultSignature()
if err == nil {
return AuthorInfo(*sig)
}
// libgit2 failed for some reason (i.e. user.name config not set). Let's cook ident ourselves
defaultIdentOnce.Do(func() {
var username, name string
u, _ := user.Current()
if u != nil {
username = u.Username
name = u.Name
} else {
username = "?"
name = "?"
}
// XXX it is better to get hostname as fqdn
hostname, _ := os.Hostname()
if hostname == "" {
hostname = "?"
}
defaultIdent.Name = name
defaultIdent.Email = fmt.Sprintf("%s@%s", username, hostname)
})
ident := defaultIdent
ident.When = time.Now()
return ident
sig, err := g.DefaultSignature()
if err == nil {
return AuthorInfo(*sig)
}
// libgit2 failed for some reason (i.e. user.name config not set). Let's cook ident ourselves
defaultIdentOnce.Do(func() {
var username, name string
u, _ := user.Current()
if u != nil {
username = u.Username
name = u.Name
} else {
username = "?"
name = "?"
}
// XXX it is better to get hostname as fqdn
hostname, _ := os.Hostname()
if hostname == "" {
hostname = "?"
}
defaultIdent.Name = name
defaultIdent.Email = fmt.Sprintf("%s@%s", username, hostname)
})
ident := defaultIdent
ident.When = time.Now()
return ident
}
// mkref creates a git reference.
//
// it is an error if the reference already exists.
func mkref(g *git.Repository, name string, sha1 Sha1) error {
_, err := g.References.Create(name, sha1.AsOid(), false, "")
return err
_, err := g.References.Create(name, sha1.AsOid(), false, "")
return err
}
// `git commit-tree` -> commit_sha1, raise on error
func xcommit_tree2(g *git.Repository, tree Sha1, parents []Sha1, msg string, author AuthorInfo, committer AuthorInfo) Sha1 {
ident := getDefaultIdent(g)
if author.Name == "" { author.Name = ident.Name }
if author.Email == "" { author.Email = ident.Email }
if author.When.IsZero() { author.When = ident.When }
if committer.Name == "" { committer.Name = ident.Name }
if committer.Email == "" { committer.Email = ident.Email }
if committer.When.IsZero() { committer.When = ident.When }
commit := fmt.Sprintf("tree %s\n", tree)
for _, p := range parents {
commit += fmt.Sprintf("parent %s\n", p)
}
commit += fmt.Sprintf("author %s\n", &author)
commit += fmt.Sprintf("committer %s\n", &committer)
commit += fmt.Sprintf("\n%s", msg)
sha1, err := WriteObject(g, mem.Bytes(commit), git.ObjectCommit)
exc.Raiseif(err)
return sha1
ident := getDefaultIdent(g)
if author.Name == "" { author.Name = ident.Name }
if author.Email == "" { author.Email = ident.Email }
if author.When.IsZero() { author.When = ident.When }
if committer.Name == "" { committer.Name = ident.Name }
if committer.Email == "" { committer.Email = ident.Email }
if committer.When.IsZero() { committer.When = ident.When }
commit := fmt.Sprintf("tree %s\n", tree)
for _, p := range parents {
commit += fmt.Sprintf("parent %s\n", p)
}
commit += fmt.Sprintf("author %s\n", &author)
commit += fmt.Sprintf("committer %s\n", &committer)
commit += fmt.Sprintf("\n%s", msg)
sha1, err := WriteObject(g, mem.Bytes(commit), git.ObjectCommit)
exc.Raiseif(err)
return sha1
}
func xcommit_tree(g *git.Repository, tree Sha1, parents []Sha1, msg string) Sha1 {
return xcommit_tree2(g, tree, parents, msg, AuthorInfo{}, AuthorInfo{})
return xcommit_tree2(g, tree, parents, msg, AuthorInfo{}, AuthorInfo{})
}
......@@ -263,14 +263,14 @@ func xcommit_tree(g *git.Repository, tree Sha1, parents []Sha1, msg string) Sha1
//
// Only valid concrete git types are converted successfully.
func gittype(typ string) (git.ObjectType, bool) {
switch typ {
case "commit": return git.ObjectCommit, true
case "tree": return git.ObjectTree, true
case "blob": return git.ObjectBlob, true
case "tag": return git.ObjectTag, true
}
switch typ {
case "commit": return git.ObjectCommit, true
case "tree": return git.ObjectTree, true
case "blob": return git.ObjectBlob, true
case "tag": return git.ObjectTag, true
}
return git.ObjectBad, false
return git.ObjectBad, false
}
......@@ -282,12 +282,12 @@ func gittype(typ string) (git.ObjectType, bool) {
//
// gittypestr expects the type to be valid and concrete - else it panics.
func gittypestr(typ git.ObjectType) string {
switch typ {
case git.ObjectCommit: return "commit"
case git.ObjectTree: return "tree"
case git.ObjectBlob: return "blob"
case git.ObjectTag: return "tag"
}
panic(fmt.Sprintf("git type %#v invalid", typ))
switch typ {
case git.ObjectCommit: return "commit"
case git.ObjectTree: return "tree"
case git.ObjectBlob: return "blob"
case git.ObjectTag: return "tag"
}
panic(fmt.Sprintf("git type %#v invalid", typ))
}
......@@ -25,44 +25,44 @@ package main
type Sha1Set map[Sha1]struct{}
func (s Sha1Set) Add(v Sha1) {
s[v] = struct{}{}
s[v] = struct{}{}
}
func (s Sha1Set) Contains(v Sha1) bool {
_, ok := s[v]
return ok
_, ok := s[v]
return ok
}
// all elements of set as slice
func (s Sha1Set) Elements() []Sha1 {
ev := make([]Sha1, len(s))
i := 0
for e := range s {
ev[i] = e
i++
}
return ev
ev := make([]Sha1, len(s))
i := 0
for e := range s {
ev[i] = e
i++
}
return ev
}
// Set<string>
type StrSet map[string]struct{}
func (s StrSet) Add(v string) {
s[v] = struct{}{}
s[v] = struct{}{}
}
func (s StrSet) Contains(v string) bool {
_, ok := s[v]
return ok
_, ok := s[v]
return ok
}
// all elements of set as slice
func (s StrSet) Elements() []string {
ev := make([]string, len(s))
i := 0
for e := range s {
ev[i] = e
i++
}
return ev
ev := make([]string, len(s))
i := 0
for e := range s {
ev[i] = e
i++
}
return ev
}
......@@ -21,13 +21,13 @@ package main
// Git-backup | Sha1 type to work with SHA1 oids
import (
"bytes"
"encoding/hex"
"fmt"
"bytes"
"encoding/hex"
"fmt"
"lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/mem"
git "github.com/libgit2/git2go"
git "github.com/libgit2/git2go"
)
const SHA1_RAWSIZE = 20
......@@ -39,51 +39,51 @@ const SHA1_RAWSIZE = 20
// - slice size = 24 bytes
// -> so it is reasonable to pass Sha1 not by reference
type Sha1 struct {
sha1 [SHA1_RAWSIZE]byte
sha1 [SHA1_RAWSIZE]byte
}
// fmt.Stringer
var _ fmt.Stringer = Sha1{}
func (sha1 Sha1) String() string {
return hex.EncodeToString(sha1.sha1[:])
return hex.EncodeToString(sha1.sha1[:])
}
func Sha1Parse(sha1str string) (Sha1, error) {
sha1 := Sha1{}
if hex.DecodedLen(len(sha1str)) != SHA1_RAWSIZE {
return Sha1{}, fmt.Errorf("sha1parse: %q invalid", sha1str)
}
_, err := hex.Decode(sha1.sha1[:], mem.Bytes(sha1str))
if err != nil {
return Sha1{}, fmt.Errorf("sha1parse: %q invalid: %s", sha1str, err)
}
return sha1, nil
sha1 := Sha1{}
if hex.DecodedLen(len(sha1str)) != SHA1_RAWSIZE {
return Sha1{}, fmt.Errorf("sha1parse: %q invalid", sha1str)
}
_, err := hex.Decode(sha1.sha1[:], mem.Bytes(sha1str))
if err != nil {
return Sha1{}, fmt.Errorf("sha1parse: %q invalid: %s", sha1str, err)
}
return sha1, nil
}
// fmt.Scanner
var _ fmt.Scanner = (*Sha1)(nil)
func (sha1 *Sha1) Scan(s fmt.ScanState, ch rune) error {
switch ch {
case 's', 'v':
default:
return fmt.Errorf("Sha1.Scan: invalid verb %q", ch)
}
tok, err := s.Token(true, nil)
if err != nil {
return err
}
*sha1, err = Sha1Parse(mem.String(tok))
return err
switch ch {
case 's', 'v':
default:
return fmt.Errorf("Sha1.Scan: invalid verb %q", ch)
}
tok, err := s.Token(true, nil)
if err != nil {
return err
}
*sha1, err = Sha1Parse(mem.String(tok))
return err
}
// check whether sha1 is null
func (sha1 *Sha1) IsNull() bool {
return *sha1 == Sha1{}
return *sha1 == Sha1{}
}
// for sorting by Sha1
......@@ -95,9 +95,9 @@ func (p BySha1) Less(i, j int) bool { return bytes.Compare(p[i].sha1[:], p[j].sh
// interoperability with git2go
func (sha1 *Sha1) AsOid() *git.Oid {
return (*git.Oid)(&sha1.sha1)
return (*git.Oid)(&sha1.sha1)
}
func Sha1FromOid(oid *git.Oid) Sha1 {
return Sha1{*oid}
return Sha1{*oid}
}
......@@ -21,161 +21,161 @@ package main
// Git-backup | Miscellaneous utilities
import (
"encoding/hex"
"fmt"
"os"
"strings"
"syscall"
"unicode"
"unicode/utf8"
"lab.nexedi.com/kirr/go123/mem"
"encoding/hex"
"fmt"
"os"
"strings"
"syscall"
"unicode"
"unicode/utf8"
"lab.nexedi.com/kirr/go123/mem"
)
// strip_prefix("/a/b", "/a/b/c/d/e") -> "c/d/e" (without leading /)
// path must start with prefix
func strip_prefix(prefix, path string) string {
if !strings.HasPrefix(path, prefix) {
panic(fmt.Errorf("strip_prefix: %q has no prefix %q", path, prefix))
}
path = path[len(prefix):]
for strings.HasPrefix(path, "/") {
path = path[1:] // strip leading /
}
return path
if !strings.HasPrefix(path, prefix) {
panic(fmt.Errorf("strip_prefix: %q has no prefix %q", path, prefix))
}
path = path[len(prefix):]
for strings.HasPrefix(path, "/") {
path = path[1:] // strip leading /
}
return path
}
// reprefix("/a", "/b", "/a/str") -> "/b/str"
// path must start with prefix_from
func reprefix(prefix_from, prefix_to, path string) string {
path = strip_prefix(prefix_from, path)
return fmt.Sprintf("%s/%s", prefix_to, path)
path = strip_prefix(prefix_from, path)
return fmt.Sprintf("%s/%s", prefix_to, path)
}
// like ioutil.WriteFile() but takes native mode/perm
func writefile(path string, data []byte, perm uint32) error {
fd, err := syscall.Open(path, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_TRUNC, perm)
if err != nil {
return &os.PathError{"open", path, err}
}
f := os.NewFile(uintptr(fd), path)
_, err = f.Write(data)
err2 := f.Close()
if err == nil {
err = err2
}
return err
fd, err := syscall.Open(path, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_TRUNC, perm)
if err != nil {
return &os.PathError{"open", path, err}
}
f := os.NewFile(uintptr(fd), path)
_, err = f.Write(data)
err2 := f.Close()
if err == nil {
err = err2
}
return err
}
// escape path so that git is happy to use it as ref
// https://git.kernel.org/cgit/git/git.git/tree/refs.c?h=v2.9.0-37-g6d523a3#n34
// XXX very suboptimal
func path_refescape(path string) string {
outv := []string{}
for _, component := range strings.Split(path, "/") {
out := ""
dots := 0 // number of seen consecutive dots
for len(component) > 0 {
r, size := utf8.DecodeRuneInString(component)
// no ".." anywhere - we replace dots run to %46%46... with trailing "."
// this way for single "." case we'll have it intact and avoid .. anywhere
// also this way: trailing .git is always encoded as ".git"
if r == '.' {
dots += 1
component = component[size:]
continue
}
if dots != 0 {
out += strings.Repeat(escape("."), dots-1)
out += "."
dots = 0
}
rbytes := component[:size]
if shouldEscape(r) {
rbytes = escape(rbytes)
}
out += rbytes
component = component[size:]
}
// handle trailing dots
if dots != 0 {
out += strings.Repeat(escape("."), dots-1)
out += "."
}
if len(out) > 0 {
// ^. not allowed
if out[0] == '.' {
out = escape(".") + out[1:]
}
// .lock$ not allowed
if strings.HasSuffix(out, ".lock") {
out = out[:len(out)-5] + escape(".") + "lock"
}
}
outv = append(outv, out)
}
// strip trailing /
for len(outv) > 0 {
if len(outv[len(outv)-1]) != 0 {
break
}
outv = outv[:len(outv)-1]
}
return strings.Join(outv, "/")
outv := []string{}
for _, component := range strings.Split(path, "/") {
out := ""
dots := 0 // number of seen consecutive dots
for len(component) > 0 {
r, size := utf8.DecodeRuneInString(component)
// no ".." anywhere - we replace dots run to %46%46... with trailing "."
// this way for single "." case we'll have it intact and avoid .. anywhere
// also this way: trailing .git is always encoded as ".git"
if r == '.' {
dots += 1
component = component[size:]
continue
}
if dots != 0 {
out += strings.Repeat(escape("."), dots-1)
out += "."
dots = 0
}
rbytes := component[:size]
if shouldEscape(r) {
rbytes = escape(rbytes)
}
out += rbytes
component = component[size:]
}
// handle trailing dots
if dots != 0 {
out += strings.Repeat(escape("."), dots-1)
out += "."
}
if len(out) > 0 {
// ^. not allowed
if out[0] == '.' {
out = escape(".") + out[1:]
}
// .lock$ not allowed
if strings.HasSuffix(out, ".lock") {
out = out[:len(out)-5] + escape(".") + "lock"
}
}
outv = append(outv, out)
}
// strip trailing /
for len(outv) > 0 {
if len(outv[len(outv)-1]) != 0 {
break
}
outv = outv[:len(outv)-1]
}
return strings.Join(outv, "/")
}
func shouldEscape(r rune) bool {
if unicode.IsSpace(r) || unicode.IsControl(r) {
return true
}
switch r {
// NOTE RuneError is for always escaping non-valid UTF-8
case ':', '?', '[', '\\', '^', '~', '*', '@', '%', utf8.RuneError:
return true
}
return false
if unicode.IsSpace(r) || unicode.IsControl(r) {
return true
}
switch r {
// NOTE RuneError is for always escaping non-valid UTF-8
case ':', '?', '[', '\\', '^', '~', '*', '@', '%', utf8.RuneError:
return true
}
return false
}
func escape(s string) string {
out := ""
for i := 0; i < len(s); i++ {
out += fmt.Sprintf("%%%02X", s[i])
}
return out
out := ""
for i := 0; i < len(s); i++ {
out += fmt.Sprintf("%%%02X", s[i])
}
return out
}
// unescape path encoded by path_refescape()
// decoding is permissive - any byte can be %-encoded, not only special cases
// XXX very suboptimal
func path_refunescape(s string) (string, error) {
l := len(s)
out := make([]byte, 0, len(s))
for i := 0; i < l; i++ {
c := s[i]
if c == '%' {
if i+2 >= l {
return "", EscapeError(s)
}
b, err := hex.DecodeString(s[i+1:i+3])
if err != nil {
return "", EscapeError(s)
}
c = b[0]
i += 2
}
out = append(out, c)
}
return mem.String(out), nil
l := len(s)
out := make([]byte, 0, len(s))
for i := 0; i < l; i++ {
c := s[i]
if c == '%' {
if i+2 >= l {
return "", EscapeError(s)
}
b, err := hex.DecodeString(s[i+1 : i+3])
if err != nil {
return "", EscapeError(s)
}
c = b[0]
i += 2
}
out = append(out, c)
}
return mem.String(out), nil
}
type EscapeError string
func (e EscapeError) Error() string {
return fmt.Sprintf("%q: invalid escape format", string(e))
return fmt.Sprintf("%q: invalid escape format", string(e))
}
......@@ -20,81 +20,81 @@
package main
import (
"strings"
"testing"
"strings"
"testing"
)
func TestPathEscapeUnescape(t *testing.T) {
type TestEntry struct { path string; escapedv []string }
te := func(path string, escaped ...string) TestEntry {
return TestEntry{path, escaped}
}
var tests = []TestEntry{
// path escaped non-canonical escapes
te("hello/world", "hello/world", "%68%65%6c%6c%6f%2f%77%6f%72%6c%64"),
te("hello/мир", "hello/мир"),
te("hello/ мир", "hello/%20мир"),
te("hel%lo/мир", "hel%25lo/мир"),
te(".hello/.world", "%2Ehello/%2Eworld"),
te("..hello/world.loc", "%2E.hello/world.loc"),
te("..hello/world.lock", "%2E.hello/world%2Elock"),
// leading /
te("/hello/world", "/hello/world"),
te("//hello///world", "//hello///world"),
// trailing /
te("/hello/world/", "/hello/world"),
te("/hello/world//", "/hello/world"),
type TestEntry struct { path string; escapedv []string }
te := func(path string, escaped ...string) TestEntry {
return TestEntry{path, escaped}
}
var tests = []TestEntry{
// path escaped non-canonical escapes
te("hello/world", "hello/world", "%68%65%6c%6c%6f%2f%77%6f%72%6c%64"),
te("hello/мир", "hello/мир"),
te("hello/ мир", "hello/%20мир"),
te("hel%lo/мир", "hel%25lo/мир"),
te(".hello/.world", "%2Ehello/%2Eworld"),
te("..hello/world.loc", "%2E.hello/world.loc"),
te("..hello/world.lock", "%2E.hello/world%2Elock"),
// leading /
te("/hello/world", "/hello/world"),
te("//hello///world", "//hello///world"),
// trailing /
te("/hello/world/", "/hello/world"),
te("/hello/world//", "/hello/world"),
// trailing ...
te("/hello/world.", "/hello/world."),
te("/hello/world..", "/hello/world%2E."),
te("/hello/world...", "/hello/world%2E%2E."),
te("/hello/world...git", "/hello/world%2E%2E.git"),
// trailing ...
te("/hello/world.", "/hello/world."),
te("/hello/world..", "/hello/world%2E."),
te("/hello/world...", "/hello/world%2E%2E."),
te("/hello/world...git", "/hello/world%2E%2E.git"),
// .. anywhere
te("/hello/./world", "/hello/%2E/world"),
te("/hello/.a/world", "/hello/%2Ea/world"),
te("/hello/a./world", "/hello/a./world"),
te("/hello/../world", "/hello/%2E./world"),
te("/hello/a..b/world", "/hello/a%2E.b/world"),
te("/hello/a.c.b/world", "/hello/a.c.b/world"),
te("/hello/a.c..b/world", "/hello/a.c%2E.b/world"),
// .. anywhere
te("/hello/./world", "/hello/%2E/world"),
te("/hello/.a/world", "/hello/%2Ea/world"),
te("/hello/a./world", "/hello/a./world"),
te("/hello/../world", "/hello/%2E./world"),
te("/hello/a..b/world", "/hello/a%2E.b/world"),
te("/hello/a.c.b/world", "/hello/a.c.b/world"),
te("/hello/a.c..b/world", "/hello/a.c%2E.b/world"),
// special & control characters
te("/hel lo/wor\tld/a:?[\\^~*@%b/\001\004\n\xc2\xa0", "/hel%20lo/wor%09ld/a%3A%3F%5B%5C%5E%7E%2A%40%25b/%01%04%0A%C2%A0"),
// special & control characters
te("/hel lo/wor\tld/a:?[\\^~*@%b/\001\004\n\xc2\xa0", "/hel%20lo/wor%09ld/a%3A%3F%5B%5C%5E%7E%2A%40%25b/%01%04%0A%C2%A0"),
// utf8 error
te("a\xc5z", "a%C5z"),
}
// utf8 error
te("a\xc5z", "a%C5z"),
}
for _, tt := range tests {
escaped := path_refescape(tt.path)
if escaped != tt.escapedv[0] {
t.Errorf("path_refescape(%q) -> %q ; want %q", tt.path, escaped, tt.escapedv[0])
}
// also check the decoding
pathok := strings.TrimRight(tt.path, "/")
for _, escaped := range tt.escapedv {
unescaped, err := path_refunescape(escaped)
if unescaped != pathok || err != nil {
t.Errorf("path_refunescape(%q) -> %q %v ; want %q nil", escaped, unescaped, err, tt.path)
}
}
}
for _, tt := range tests {
escaped := path_refescape(tt.path)
if escaped != tt.escapedv[0] {
t.Errorf("path_refescape(%q) -> %q ; want %q", tt.path, escaped, tt.escapedv[0])
}
// also check the decoding
pathok := strings.TrimRight(tt.path, "/")
for _, escaped := range tt.escapedv {
unescaped, err := path_refunescape(escaped)
if unescaped != pathok || err != nil {
t.Errorf("path_refunescape(%q) -> %q %v ; want %q nil", escaped, unescaped, err, tt.path)
}
}
}
}
func TestPathUnescapeErr(t *testing.T) {
var tests = []struct{ escaped string }{
{"%"},
{"%2"},
{"%2q"},
{"hell%2q/world"},
}
var tests = []struct{ escaped string }{
{"%"},
{"%2"},
{"%2q"},
{"hell%2q/world"},
}
for _, tt := range tests {
unescaped, err := path_refunescape(tt.escaped)
if err == nil || unescaped != "" {
t.Errorf("path_refunescape(%q) -> %q %v ; want \"\" err", tt.escaped, unescaped, err)
}
}
for _, tt := range tests {
unescaped, err := path_refunescape(tt.escaped)
if err == nil || unescaped != "" {
t.Errorf("path_refunescape(%q) -> %q %v ; want \"\" err", tt.escaped, unescaped, err)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment