Commit 28986e0e authored by Kirill Smelkov's avatar Kirill Smelkov

Rewrite in Go

This is more-or-less 1-to-1 port of git-backup to Go. There are things
we handle a bit differently:

- there is a separate type for Sha1
- conversion of repo paths to git references is now more robust wrt
  avoiding not-allowed in git constructs like ".." or ".lock"

  https://git.kernel.org/cgit/git/git.git/tree/refs.c?h=v2.9.0-37-g6d523a3#n34

The rewrite happened because we need to optimize restore, and for e.g.
parallelizing part it should be convenient to use goroutines and channels.

I'm not very comfortable with how error handling is done, because
contrary to what canonical Go way seems to be, in a lot of places it still
looks to me exceptions are better idea compared to just error codes,
though in many places just error codes are better and makes more sense.
Probably there will be less exceptions over time once the code starts to
be collaborating set of goroutines with communications done via
channels.

Still a lot of python habits on my side.

And as a bonus we now have end-to-end pull/restore tests...
parent a6cfe210
git-backup
......@@ -50,12 +50,12 @@ Backup workflow is:
$ git pull ...
Please see `git-backup`__ source with technical overview on how it works.
Please see `git-backup.go`__ source with technical overview on how it works.
We also provide convenience program to pull/restore backup data for a GitLab
instance into/from git-backup managed repository. See `contrib/gitlab-backup`__
for details.
__ git-backup
__ git-backup.go
__ contrib/gitlab-backup
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// Git-backup | Exception-style errors
package main
import (
"fmt"
"runtime"
"strings"
)
// error type which is raised by raise(arg)
type Error struct {
arg interface{}
link *Error // chain of linked Error(s) - see e.g. errcontext()
}
func (e *Error) Error() string {
msgv := []string{}
msg := ""
for e != nil {
// TODO(go1.7) -> runtime.Frame (see xtraceback())
if f, ok := e.arg.(Frame); ok {
//msg = f.Function
//msg = fmt.Sprintf("%s (%s:%d)", f.Function, f.File, f.Line)
msg = strings.TrimPrefix(f.Name(), _errorpkgdot) // XXX -> better prettyfunc
} else {
msg = fmt.Sprint(e.arg)
}
msgv = append(msgv, msg)
e = e.link
}
return strings.Join(msgv, ": ")
}
// turn any value into Error
// if v is already Error - it stays the same
// otherwise new Error is created
func aserror(v interface{}) *Error {
if e, ok := v.(*Error); ok {
return e
}
return &Error{v, nil}
}
// raise error to upper level
func raise(arg interface{}) {
panic(aserror(arg))
}
// raise formatted string
func raisef(format string, a ...interface{}) {
raise(fmt.Sprintf(format, a...))
}
// raise if err != nil
// NOTE err can be != nil even if typed obj = nil:
// var obj *T;
// err = obj
// err != nil is true
func raiseif(err error) {
//if err != nil && !reflect.ValueOf(err).IsNil() {
if err != nil {
raise(err)
}
}
// checks recovered value to be of *Error
// if there is non-Error error - repanic it
// otherwise return Error either nil (no panic), or actual value
func _errcatch(r interface{}) *Error {
e, _ := r.(*Error)
if e == nil && r != nil {
panic(r)
}
return e
}
// catch error and call f(e) if it was caught.
// must be called under defer
func errcatch(f func(e *Error)) {
e := _errcatch(recover())
if e == nil {
return
}
f(e)
}
// be notified when error unwinding is being happening.
// hook into unwinding process with f() call. Returned error is reraised.
// see also: errcontext()
// must be called under defer
func erronunwind(f func(e *Error) *Error) {
// cannot do errcatch(...)
// as recover() works only in first-level called functions
e := _errcatch(recover())
if e == nil {
return
}
e = f(e)
panic(e)
}
// provide error context to automatically add on unwinding.
// f is called if error unwinding is happening.
// call result is added to raised error as "prefix" context
// must be called under defer
func errcontext(f func() interface{}) {
e := _errcatch(recover())
if e == nil {
return
}
arg := f()
panic(erraddcontext(e, arg))
}
// add "prefix" context to error
func erraddcontext(e *Error, arg interface{}) *Error {
return &Error{arg, e}
}
func _myfuncname(nskip int) string {
pcv := [1]uintptr{}
runtime.Callers(nskip, pcv[:])
f := runtime.FuncForPC(pcv[0])
if f == nil {
return ""
}
return f.Name()
}
// get name of currently running function (caller of myfuncname())
// name is fully qualified package/name.function(.x)
func myfuncname() string {
return _myfuncname(3)
}
// get name of currently running function's package
// package is fully qualified package/name
func mypkgname() string {
myfunc := _myfuncname(3)
if myfunc == "" {
return ""
}
// NOTE dots in package name are escaped by go as %2e
// this way the first dot is delimiter between package and function
idot := strings.IndexByte(myfunc, '.')
if idot == -1 {
panic(fmt.Errorf("funcname %q is not fully qualified", myfunc))
}
return myfunc[:idot]
}
// TODO(go1.7) goes away in favour of runtime.Frame
type Frame struct {
*runtime.Func
pc uintptr
}
// get current calling traceback as []Frame
// nskip meaning: the same as in runtime.Callers()
// TODO(go1.7) []Frame -> []runtime.Frame
func xtraceback(nskip int) []Frame {
// all callers
var pcv = []uintptr{0}
for {
pcv = make([]uintptr, 2*len(pcv))
n := runtime.Callers(nskip+1, pcv)
if n < len(pcv) {
pcv = pcv[:n]
break
}
}
// pcv -> frames
/*
framev := make([]runtime.Frame, 0, len(pcv))
frames := runtime.CallersFrames(pcv)
for more := true; more; {
var frame runtime.Frame
frame, more = frames.Next()
framev = append(framev, frame)
}
*/
framev := make([]Frame, 0, len(pcv))
for _, pc := range pcv {
framev = append(framev, Frame{runtime.FuncForPC(pc), pc})
}
return framev
}
var (
_errorpkgname string // package name under which error.go lives
_errorpkgdot string // errorpkg.
_errorraise string // errorpkg.raise
)
func init() {
_errorpkgname = mypkgname()
_errorpkgdot = _errorpkgname + "."
_errorraise = _errorpkgname + ".raise"
}
// add calling context to error.
// Add calling function names as error context up-to topfunc not including.
// see also: erraddcontext()
func erraddcallingcontext(topfunc string, e *Error) *Error {
seenraise := false
for _, f := range xtraceback(2) {
// do not show anything after raise*()
if !seenraise && strings.HasPrefix(f.Name(), _errorraise) {
seenraise = true
continue
}
if !seenraise {
continue
}
// do not go beyond topfunc
if topfunc != "" && f.Name() == topfunc {
break
}
// skip intermediates
if strings.HasSuffix(f.Name(), "_") { // XXX -> better skipfunc
continue
}
e = &Error{f, e}
}
return e
}
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
package main
import (
"strings"
"testing"
)
func do_raise1() {
raise(1)
}
func TestErrRaiseCatch(t *testing.T) {
defer errcatch(func(e *Error) {
if !(e.arg == 1 && e.link == nil) {
t.Fatalf("error caught but unexpected: %#v ; want {1, nil}", e)
}
})
do_raise1()
t.Fatal("error not caught")
}
// verify err chain has .arg(s) as expected
func verifyErrChain(t *testing.T, e *Error, argv ...interface{}) {
i := 0
for ; e != nil; i, e = i+1, e.link {
if i >= len(argv) {
t.Fatal("too long error chain")
}
if e.arg != argv[i] {
t.Fatalf("error caught but unexpected %vth arg: %v ; want %v", i, e.arg, argv[i])
}
}
if i < len(argv) {
t.Fatal("too small error chain")
}
}
func do_onunwind1(t *testing.T) {
defer erronunwind(func(e *Error) *Error {
t.Fatal("on unwind called without raise")
return nil
})
}
func do_onunwind2() {
defer erronunwind(func(e *Error) *Error {
return &Error{2, e}
})
do_raise1()
}
func TestErrOnUnwind(t *testing.T) {
defer errcatch(func(e *Error) {
verifyErrChain(t, e, 2, 1)
})
do_onunwind1(t)
do_onunwind2()
t.Fatal("error not caught")
}
func do_context1(t *testing.T) {
defer errcontext(func() interface{} {
t.Fatal("on context called without raise")
return nil
})
}
func do_context2() {
defer errcontext(func() interface{} {
return 3
})
do_raise1()
}
func TestErrContext(t *testing.T) {
defer errcatch(func(e *Error) {
verifyErrChain(t, e, 3, 1)
})
do_context1(t)
do_context2()
t.Fatal("error not caught")
}
func TestMyFuncName(t *testing.T) {
myfunc := myfuncname()
// go test changes full package name (putting filesystem of the tree into ti)
// thus we check only for suffix
wantsuffix := ".TestMyFuncName"
if !strings.HasSuffix(myfunc, wantsuffix) {
t.Errorf("myfuncname() -> %v ; want *%v", myfunc, wantsuffix)
}
}
func do_raise11() {
do_raise1()
}
func TestErrAddCallingContext(t *testing.T) {
myfunc := myfuncname()
defer errcatch(func(e *Error) {
e = erraddcallingcontext(myfunc, e)
msg, want := e.Error(), "do_raise11: do_raise1: 1"
if msg != want {
t.Fatalf("err + calling context: %q ; want %q", msg, want)
}
})
do_raise11()
t.Fatal("error not caught")
}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2015-2016 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
"""Git-backup - Backup set of Git repositories & just files; efficiently
This program backups files and set of bare Git repositories into one Git repository.
Files are copied to blobs and then added to tree under certain place, and for
Git repositories, all reachable objects are pulled in with maintaining index
which remembers reference -> sha1 for every pulled repositories.
After objects from backuped Git repositories are pulled in, we create new
commit which references tree with changed backup index and files, and also has
all head objects from pulled-in repositories in its parents(*). This way backup
has history and all pulled objects become reachable from single head commit in
backup repository. In particular that means that the whole state of backup can
be described with only single sha1, and that backup repository itself could be
synchronized via standard git pull/push, be repacked, etc.
Restoration process is the opposite - from a particular backup state, files are
extracted at a proper place, and for Git repositories a pack with all objects
reachable from that repository heads is prepared and extracted from backup
repository object database.
This approach allows to leverage Git's good ability for object contents
deduplication and packing, especially for cases when there are many hosted
repositories which are forks of each other with relatively minor changes in
between each other and over time, and mostly common base. In author experience
the size of backup is dramatically smaller compared to straightforward "let's
tar it all" approach.
Data for all backuped files and repositories can be accessed if one has access
to backup repository, so either they all should be in the same security domain,
or extra care has to be taken to protect access to backup repository.
File permissions are not managed with strict details due to inherent
nature of Git. This aspect can be improved with e.g. etckeeper-like
(http://etckeeper.branchable.com/) approach if needed.
Please see README.rst with user-level overview on how to use git-backup.
NOTE the idea of pulling all refs together is similar to git-namespaces
http://git-scm.com/docs/gitnamespaces
(*) Tag objects are handled specially - because in a lot of places Git insists and
assumes commit parents can only be commit objects. We encode tag objects in
specially-crafted commit object on pull, and decode back on backup restore.
We do likewise if a ref points to tree or blob, which is valid in Git.
"""
import os
import sys
from os.path import join as pathjoin, exists as pathexists, dirname, islink
from time import strftime
from subprocess import Popen, PIPE
from stat import S_ISLNK
from getopt import getopt, GetoptError
from errno import EEXIST
from urllib import quote, unquote
# verbose output
# 0 - silent
# 1 - info
# 2 - progress of long-running operations
# 3 - debug
verbose = 1
def info(msg):
if verbose > 0:
print(msg)
# what to pass to git subprocess to stdout/stderr
# None - no-redirection, PIPE - output to us
def gitprogress():
return None if verbose > 1 else PIPE
def debug(msg):
if verbose > 2:
print(msg)
# run `git *argv` -> retcode, stdout, stderr
# **kw can be: stdin=... stdout,stderr=PIPE|None raw=True|False env=dict|None
def git(*argv, **kw):
argv = ['git'] + list(argv)
stdin = kw.pop('stdin', None)
stdout = kw.pop('stdout', PIPE)
stderr = kw.pop('stderr', PIPE)
raw = kw.pop('raw', False)
env = kw.pop('env', None)
if kw:
raise RuntimeError('git: unsupported kwargs: %s' % kw)
debug(' '.join(argv))
p = Popen(argv, stdin=PIPE, stdout=stdout, stderr=stderr, env=env)
stdout, stderr = p.communicate(stdin)
if not raw:
# prettify stdout (e.g. so that 'sha1\n' becomes 'sha1' and can be used directly
if stdout is not None:
stdout = stdout.strip()
if stderr is not None:
stderr = stderr.strip()
return p.returncode, stdout, stderr
# run `git *argv` -> stdout
# on error - raise exception
def xgit(*argv, **kw):
retcode, stdout, stderr = git(*argv, **kw)
if retcode:
raise RuntimeError('git ' + ' '.join(argv) + '\n' + (stderr and stderr or '(failed)'))
return stdout
# like os.walk() but raise on error + entries are emitted in sorted order;
# symlinks (both to files and directories) are always yielded in filenames
def reraise(e): raise e
def xwalk(top):
for dirpath, dirnames, filenames in os.walk(top, onerror=reraise):
# os.walk yields symlinks to dirs in dirnames - move them to files
for i, _ in enumerate(dirnames):
if islink(pathjoin(dirpath, _)):
filenames.append(_)
dirnames[i] = None
removeall(dirnames, None)
dirnames.sort(); filenames.sort()
yield dirpath, dirnames, filenames
# strip_prefix('/a/b', '/a/b/c/d/e') -> 'c/d/e' (without leading /)
def strip_prefix(prefix, path):
assert path.startswith(prefix)
path = path[len(prefix):]
while path.startswith('/'):
path = path[1:] # strip leading /
return path
# reprefix('/a', '/b', '/a/str') -> '/b/str'
def reprefix(prefix_from, prefix_to, path):
path = strip_prefix(prefix_from, path)
return '%s/%s' % (prefix_to, path)
# remove all occurrences of item in seq
def removeall(seq, item):
while item in seq:
seq.remove(item)
# mkdir -p
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != EEXIST:
raise
# file -> blob_sha1, mode
def file_to_blob(path):
argv = ['hash-object', '-w', '--no-filters']
st = os.lstat(path)
if S_ISLNK(st.st_mode):
# git hash-object does not handle symlinks
argv += ['--stdin']
stdin = os.readlink(path)
else:
argv += ['--', path]
stdin = None
blob_sha1 = xgit(*argv, stdin=stdin)
return blob_sha1, st.st_mode
# blob_sha1, mode -> file
def blob_to_file(blob_sha1, mode, path):
blob_content = xgit('cat-file', 'blob', blob_sha1, raw=True)
mkdir_p(dirname(path))
if S_ISLNK(mode):
os.symlink(blob_content, path)
else:
with open(path, 'wb') as f:
f.write(blob_content)
os.fchmod(f.fileno(), mode)
# create empty git tree -> tree sha1
tree_empty = None
def mktree_empty():
global tree_empty
if not tree_empty:
tree_empty = xgit('mktree', stdin='')
return tree_empty
# `git commit-tree` -> commit_sha1, raise on error
def xcommit_tree(tree, parents, msg, author={}, committer={}):
argv = ['commit-tree', tree]
for _ in parents:
argv += ['-p', _]
env = os.environ.copy()
if author.get('name'): env['GIT_AUTHOR_NAME'] = author['name']
if author.get('email'): env['GIT_AUTHOR_EMAIL'] = author['email']
if author.get('date'): env['GIT_AUTHOR_DATE'] = author['date']
if committer.get('name'): env['GIT_COMMITTER_NAME'] = committer['name']
if committer.get('email'): env['GIT_COMMITTER_EMAIL'] = committer['email']
if committer.get('date'): env['GIT_COMMITTER_DATE'] = committer['date']
return xgit(*argv, stdin=msg, env=env)
# parse tag object -> tagged_type, tagged_sha1
def tag_info(tag_raw):
_ = tag_raw.splitlines()
_sha1 = _[0] # object 7ecec52d42a964c4a8e9f6ca41bb0b5ce00049b4
_type = _[1] # type commit
_, tagged_sha1 = _sha1.split()
if _ != 'object':
raise RuntimeError("E: expected 'object' got %r" % _)
_, tagged_type = _type.split()
if _ != 'type':
raise RuntimeError("E: expected 'type' got %r" % _)
return tagged_type, tagged_sha1
# represent tag/tree/blob as specially crafted commit
#
# The reason we do this is that we want refs/tag/* to be parents of synthetic
# backup commit, but git does not allow tag objects to be in commit parents.
# Also besides commit and tag, it is possible for a ref to point to a tree or blob.
#
# We always attach original tagged object to crafted commit in one way or
# another, so that on backup restore we only have to recreate original tag
# object and tagged object is kept there in repo thanks to it being reachable
# through created commit.
def obj_represent_as_commit(sha1, obj_type=None):
if obj_type is None:
obj_type = xgit('cat-file', '-t', sha1)
if obj_type not in ('tag', 'tree', 'blob'):
raise RuntimeError('E: cannot encode %s as commit' % obj_type)
# first line in commit msg = object type
obj_encoded = '%s\n' % obj_type
# below the code layout is mainly for tag type, and we hook tree and blob
# types handling into that layout
if obj_type == 'tag':
tag_raw = xgit('cat-file', 'tag', sha1, raw=True)
tagged_type, tagged_sha1 = tag_info(tag_raw)
obj_encoded += tag_raw
else:
# for tree/blob we only care that object stays reachable
tagged_type = obj_type
tagged_sha1 = sha1
# all commits we do here - we do with fixed name/date, so transformation
# tag->commit is stable wrt git environment and time change
fixed = {'name': 'Git backup', 'email': 'git@backup.org', 'date': '@0 +0000'}
def zcommit_tree(tree, parents, msg):
return xcommit_tree(tree, parents, msg, author=fixed, committer=fixed)
# Tag ~> Commit*
# | .msg: Tag
# v .tree -> ø
# Commit .parent -> Commit
if tagged_type == 'commit':
return zcommit_tree(mktree_empty(), [tagged_sha1], obj_encoded)
# Tag ~> Commit*
# | .msg: Tag
# v .tree -> Tree
# Tree .parent -> ø
if tagged_type == 'tree':
return zcommit_tree(tagged_sha1, [], obj_encoded)
# Tag ~> Commit*
# | .msg: Tag
# v .tree -> Tree* "tagged" -> Blob
# Blob .parent -> ø
if tagged_type == 'blob':
tree_for_blob = xgit('mktree', stdin='100644 blob %s\ttagged\n' % tagged_sha1)
return zcommit_tree(tree_for_blob, [], obj_encoded)
# Tag₂ ~> Commit₂*
# | .msg: Tag₂
# v .tree -> ø
# Tag₁ .parent -> Commit₁*
if tagged_type == 'tag':
commit1 = obj_represent_as_commit(tagged_sha1, tagged_type)
return zcommit_tree(mktree_empty(), [commit1], obj_encoded)
raise RuntimeError('E: Unknown tagged type %r in %s' % (tagged_type, sha1))
# recreate tag/tree/blob from specially crafted commit
# (see obj_represent_as_commit() about how a objects are originally translated into commit)
def obj_recreate_from_commit(commit_sha1):
# extract .tree .parent[] and .msg
#
# unfortunately `git show --format=%B` adds newline and optionally wants to
# reencode commit message and otherwise heavily rely on rev-list traversal
# machinery -> so we decode commit by hand in a plumbing way.
commit_raw = xgit('cat-file', 'commit', commit_sha1, raw=True)
msg_start = commit_raw.index('\n\n')
msg = commit_raw[msg_start+2:]
head = commit_raw[:msg_start+1]
tree = None
parent = None
for _ in head.splitlines():
if _.startswith('tree '):
if tree:
raise RuntimeError('E: multiple tree in commit %s' % commit_sha1)
_, tree = _.split()
continue
if _.startswith('parent '):
if parent:
raise RuntimeError('E: encoded obj has >1 parents in commit %s' % commit_sha1)
_, parent = _.split()
continue
if not tree:
raise RuntimeError('E: no tree in commit %s' % commit_sha1)
obj_type, _, obj_raw = msg.partition('\n')
if obj_type not in ('tag', 'tree', 'blob'):
raise RuntimeError('E: unexpected encoded object type %r in %s' % (obj_type, commit_sha1))
# for tree/blob we do not need to do anything - that objects were reachable
# from commit and are present in git db.
if obj_type in ('tree', 'blob'):
return
# re-create tag object
tag_sha1 = xgit('hash-object', '-t', 'tag', '-w', '--stdin', stdin=obj_raw)
# the original tagged object should be already in repository, because we
# always attach it to encoding commit one way or another,
# except we need to recurse, if it was Tag₂->Tag₁
tagged_type, _ = tag_info(obj_raw)
if tagged_type == 'tag':
assert parent is not None
obj_recreate_from_commit(parent)
# verify consistency via re-encoding tag again
commit_sha1_ = obj_represent_as_commit(tag_sha1, 'tag')
if commit_sha1_ != commit_sha1:
raise RuntimeError('E: tag encoded by commit %s corrupt (reencoded as %s)' %
(commit_sha1, commit_sha1_))
# git-backup pull
def cmd_pull_usage(f):
print >>f, \
"""git-backup pull <dir1>:<prefix1> <dir2>:<prefix2> ...
Pull bare Git repositories & just files from dir1 into backup prefix1,
from dir2 into backup prefix2, etc...
"""
def cmd_pull(argv):
try:
optv, argv = getopt(argv, "h", ["help"])
except GetoptError as e:
print(e)
cmd_pull_usage(sys.stderr)
exit(1)
for opt, arg in optv:
if opt in ("-h", "--help"):
cmd_pull_usage(sys.stdout)
exit(0)
if not argv:
cmd_pull_usage(sys.stderr)
exit(1)
pullspecv = [] # [] of (dir,prefix)
for _ in argv:
try:
dir_, prefix = _.split(':')
except ValueError:
print >>sys.stderr, "E: invalid pullspec '%s'" % _
cmd_pull_usage(sys.stderr)
exit(1)
pullspecv.append( (dir_, prefix) )
cmd_pull_(pullspecv)
def cmd_pull_(pullspecv):
# while pulling, we'll keep refs from all pulled repositories under temp
# unique work refs namespace.
backup_time = strftime('%Y%m%d-%H%M')
backup_refs_work = 'refs/backup/%s/' % backup_time # refs/backup/20150820-2109/
backup_lock = 'refs/backup.locked'
# make sure another `git-backup pull` is not running
xgit('update-ref', backup_lock, mktree_empty(), '00'*20)
# make sure there is root commit
st, _, _ = git('rev-parse', '--verify', 'HEAD')
if st:
info('# creating root commit')
# NOTE `git commit` does not work in bare repo - do commit by hand
commit = xcommit_tree(mktree_empty(), [], 'Initialize git-backup repository')
xgit('update-ref', '-m', 'git-backup pull init', 'HEAD', commit)
# walk over specified dirs, pulling objects from git and adding non-git-object files
for dir_, prefix in pullspecv:
# make sure index is empty for prefix (so that we start from clean
# prefix namespace and this way won't leave stale removed things)
xgit('rm', '--cached', '-r', '--ignore-unmatch', '--', prefix)
for dirpath, dirnames, filenames in xwalk(dir_):
# files -> add directly to index to commit later
for _ in filenames:
filepath = pathjoin(dirpath, _)
info('# file %s\t<- %s' % (prefix, filepath))
blob, mode = file_to_blob(filepath)
xgit('update-index', '--add', '--cacheinfo', '%o,%s,%s' % \
(mode, blob, reprefix(dir_, prefix, filepath)) )
# directories -> look for *.git and handle git object specially
for i, dirname in enumerate(dirnames):
# do not recurse into *.git/objects/ - we'll save them specially
if dirpath.endswith('.git') and dirname == 'objects':
dirnames[i] = None
continue
# else we recurse, but handle *.git specially - via fetching objects from it
if not dirname.endswith('.git'):
continue
# git repo - let's pull all refs from it to our backup refs namespace
gitrepo = pathjoin(dirpath, dirname)
info('# git %s\t<- %s' % (prefix, gitrepo))
# NOTE --no-tags : do not try to autoextend commit -> covering tag
# NOTE fetch.fsckObjects=true : check objects for corruption as they are fetched
xgit('-c', 'fetch.fsckObjects=true',
'fetch', '--no-tags', gitrepo,
'refs/*:%s%s/*' % (backup_refs_work,
# NOTE repo name is quoted as it can contain spaces, and refs must not
quote(reprefix(dir_, prefix, gitrepo))),
# TODO do not show which ref we pulled - show only pack transfer progress
stderr=gitprogress())
# XXX do we want to do full fsck of source git repo on pull as well ?
# do not recurse into dirs so marked
removeall(dirnames, None)
# all refs from all found git repositories populated.
# now prepare manifest with ref -> sha1 and do a synthetic commit merging all that sha1
# (so they become all reachable from HEAD -> survive repack and be transferable on git pull)
#
# NOTE we handle tag/tree/blob objects specially - because these objects cannot
# be in commit parents, we convert them to specially-crafted commits and use them.
# The commits prepared contain full info how to restore original objects.
# backup.refs format:
#
# 1eeb0324 <prefix>/wendelin.core.git/heads/master
# 213a9243 <prefix>/wendelin.core.git/tags/v0.4 <213a9243-converted-to-commit>
# ...
#
# NOTE `git for-each-ref` sorts output by ref
# -> backup_refs is sorted and stable between runs
backup_refs_list = xgit('for-each-ref', backup_refs_work)
backup_refs = [] # backup.refs content
backup_refs_parents = set() # sha1 for commit parents, obtained from refs
noncommit_seen = {} # {} sha1 -> sha1_ (there are many duplicate tags)
for _ in backup_refs_list.splitlines():
sha1, type_, ref = _.split()
backup_refs_entry = '%s %s' % (sha1, strip_prefix(backup_refs_work, ref))
# represent tag/tree/blob as specially crafted commit, because we
# cannot use it as commit parent.
sha1_ = sha1
if type_ != 'commit':
#info('obj_as_commit %s %s\t%s' % (sha1, type_, ref)) XXX
sha1_ = noncommit_seen.get(sha1)
if sha1_ is None:
sha1_ = obj_represent_as_commit(sha1, type_)
noncommit_seen[sha1] = sha1_
backup_refs_entry += ' %s' % sha1_
backup_refs.append(backup_refs_entry)
if sha1_ not in backup_refs_parents: # several refs can refer to the same sha1
backup_refs_parents.add(sha1_)
backup_refs = '\n'.join(backup_refs)
backup_refs_parents = list(backup_refs_parents)
backup_refs_parents.sort() # so parents order is stable in between runs
# backup_refs -> blob
backup_refs_sha1 = xgit('hash-object', '-w', '--stdin', stdin=backup_refs)
# add backup_refs blob to index
xgit('update-index', '--add', '--cacheinfo', '100644,%s,backup.refs' % backup_refs_sha1)
# index is ready - prepare tree and commit
backup_tree_sha1 = xgit('write-tree')
HEAD = xgit('rev-parse', 'HEAD')
commit_sha1 = xcommit_tree(backup_tree_sha1, [HEAD] + backup_refs_parents,
'Git-backup %s' % backup_time)
xgit('update-ref', '-m', 'git-backup pull', 'HEAD', commit_sha1, HEAD)
# remove no-longer needed backup refs & verify they don't stay
# FIXME `delete` deletes only files, but leaves empty dirs around.
# more important: this affect performance of future `git-backup pull` run a *LOT*
#
# reason is: `git pull` first check local refs, and for doing so it
# recourse into all directories, even empty ones.
#
# https://lab.nexedi.com/lab.nexedi.com/lab.nexedi.com/issues/4
#
# -> TODO also remove empty directories.
backup_refs_delete = ''
for _ in backup_refs_list.splitlines():
sha1, type_, ref = _.split()
backup_refs_delete += 'delete %s %s\n' % (ref, sha1)
xgit('update-ref', '--stdin', stdin=backup_refs_delete)
_ = xgit('for-each-ref', backup_refs_work)
if _:
raise RuntimeError('Backup refs under %s not deleted properly' % backup_refs_work)
# if we have working copy - update it
bare = xgit('rev-parse', '--is-bare-repository')
bare = (bare == 'true')
if not bare:
# `git checkout-index -af` -- does not delete deleted files
# `git read-tree -v -u --reset HEAD~ HEAD` -- needs index matching
# original worktree to properly work, but we already have updated index
#
# so we get changes we committed as diff and apply to worktree
diff = xgit('diff', '--binary', HEAD, 'HEAD', raw=True)
if diff:
diffstat = xgit('apply', '--stat', '--apply', '--binary', '--whitespace=nowarn',
stdin=diff, raw=True)
info(diffstat)
# we are done - unlock
xgit('update-ref', '-d', backup_lock)
# git-backup restore
def cmd_restore_usage(f):
print >>f, \
"""git-backup restore <commit-ish> <prefix1>:<dir1> <prefix2>:<dir2> ...
Restore Git repositories & just files from backup prefix1 into dir1,
from backup prefix2 into dir2, etc...
Backup state to restore is taken from <commit-ish>.
"""
def cmd_restore(argv):
try:
optv, argv = getopt(argv, "h", ["help"])
except GetoptError as e:
print(e)
cmd_restore_usage(sys.stderr)
exit(1)
for opt, arg in optv:
if opt in ("-h", "--help"):
cmd_restore_usage(sys.stdout)
exit(0)
if len(argv) < 2:
cmd_restore_usage(sys.stderr)
exit(1)
HEAD = argv[0]
restorespecv = [] # [] of (prefix,dir)
for _ in argv[1:]:
try:
prefix, dir_ = _.split(':')
except ValueError:
print >>sys.stderr, "E: invalid restorespec '%s'" % _
cmd_restore_usage(sys.stderr)
exit(1)
restorespecv.append( (prefix, dir_) )
cmd_restore_(HEAD, restorespecv)
# kirr/wendelin.core.git/heads/master -> kirr/wendelin.core.git
# tiwariayush/Discussion%20Forum%20.git/... -> tiwariayush/Discussion Forum .git
def ref_to_repo(ref):
dotgit = ref.find('.git/')
if dotgit == -1:
raise RuntimeError('E: %s is not a ref for a git repo' % ref)
repo = ref[:dotgit+4]
repo = unquote(repo) # unquote repo name we originally quoted when making backup
return repo
# sha1 value(s) for a ref in 'backup.refs'
class BackupRefSha1:
# .sha1 -- original sha1 this ref was pointing to in original repo
# .sha1_ -- sha1 actually used to represent sha1's object in backup repo
# (for tag/tree/blob - they are converted to commits)
def __init__(self, sha1, sha1_):
self.sha1 = sha1
self.sha1_ = sha1_
def cmd_restore_(HEAD, restorespecv):
HEAD = xgit('rev-parse', '--verify', HEAD)
# read backup refs index
backup_refs = {} # prefix+ref -> BackupRefSha1
backup_refs_ = xgit('cat-file', 'blob', '%s:backup.refs' % HEAD)
for _ in backup_refs_.splitlines():
# sha1 prefix+refname (sha1_)
refentryv = _.split()
if len(refentryv) not in (2,3):
raise RuntimeError('E: invalid entry in backup.refs: %s' % _)
sha1, ref = refentryv[:2]
sha1_ = refentryv[2] if len(refentryv) == 3 else sha1
if ref in backup_refs:
raise RuntimeError('E: duplicate ref %s in backup.refs' % ref)
backup_refs[ref] = BackupRefSha1(sha1, sha1_)
# walk over specified prefixes restoring files and packs in *.git
for prefix, dir_ in restorespecv:
if pathexists(dir_):
raise RuntimeError('E: %s already exists' % dir_)
# files
lstree = xgit('ls-tree', '--full-tree', '-r', '-z', '--', HEAD, prefix, raw=True)
repos_seen = set() # dirs of *.git seen while restoring files
for _ in lstree.split('\0'):
if not _:
continue # last empty line after last \0
# NOTE maxsplit=3 because filename can contain spaces
mode, type_, sha1, filename = _.split(None, 3)
assert type_ == 'blob'
mode = int(mode, 8)
filename = reprefix(prefix, dir_, filename)
info('# file %s\t-> %s' % (prefix, filename))
blob_to_file(sha1, mode, filename)
# make sure git will recognize *.git as repo:
# - it should have refs/{heads,tags}/ and objects/pack/ inside.
#
# NOTE doing it while restoring files, because a repo could be
# empty - without refs at all, and thus next "git packs restore"
# step will not be run for it.
filedir = dirname(filename)
if filedir.endswith('.git') and not filedir in repos_seen:
info('# repo %s\t-> %s' % (prefix, filedir))
mkdir_p('%s/refs/heads' % filedir)
mkdir_p('%s/refs/tags' % filedir)
mkdir_p('%s/objects/pack' % filedir)
repos_seen.add(filedir)
# git packs
refs = set(_ for _ in backup_refs.keys() if _.startswith('%s/' % prefix))
repos = set(ref_to_repo(_) for _ in refs)
for repo in sorted(repos): # NOTE sorted - to process repos always in the same order
repopath = reprefix(prefix, dir_, repo)
info('# git %s\t-> %s' % (prefix, repopath))
# {} ref -> BackupRefSha1 for this repo
repo_refs = dict((reprefix(repo, 'refs', _), backup_refs[_])
for _ in refs if _.startswith(repo))
# make sure tag/tree/blob objects represented as commits are
# present, before we generate pack for restored repo.
# ( such objects could be lost e.g. after backup repo repack as they
# are not reachable from backup repo HEAD )
for _ in repo_refs.values():
if _.sha1 != _.sha1_:
obj_recreate_from_commit(_.sha1_)
# extract pack for that repo from big backup pack + decoded tags
pack_argv = ['pack-objects',
'--revs', # include all objects referencable from input sha1 list
'--reuse-object', '--reuse-delta', '--delta-base-offset']
if not verbose:
pack_argv += ['-q']
pack_argv += ['%s/objects/pack/pack' % repopath]
repo_sha1_heads = '\n'.join(set(_.sha1 for _ in repo_refs.values()))
xgit(*pack_argv, stdin=repo_sha1_heads, stderr=gitprogress())
# verify that extracted repo refs match backup.refs index after extraction
x_ref_list = xgit('--git-dir=%s' % repopath,
'for-each-ref', '--format=%(objectname) %(refname)')
repo_ref_list = '\n'.join( ['%s %s' % (repo_refs[_].sha1, _)
for _ in sorted(repo_refs)] )
if x_ref_list != repo_ref_list:
raise RuntimeError('E: extracted repository refs corrupt')
# check connectivity in recreated repository.
#
# This way we verify that extracted pack indeed contains all
# objects for all refs in the repo.
#
# Compared to fsck we do not re-compute sha1 sum of objects which
# is significantly faster.
try:
xgit('--git-dir=%s' % repopath,
'rev-list', '--objects', '--stdin', '--quiet', stdin=repo_sha1_heads)
except:
print >>sys.stderr, 'E: Problem while checking connectivity of extracted repo:'
raise
# XXX disabled because it is slow
# # NOTE progress goes to stderr, problems go to stdout
# xgit('--git-dir=%s' % repopath, 'fsck',
# # only check that traversal from refs is ok: this unpacks
# # commits and trees and verifies blob objects are there,
# # but do _not_ unpack blobs =fast.
# '--connectivity-only',
# stdout=gitprogress(), stderr=gitprogress())
commands = {
'pull': cmd_pull,
'restore': cmd_restore,
}
def usage(f):
print >>f, \
"""git-backup [options] <command>
pull pull git-repositories and files to backup
restore restore git-repositories and files from backup
common options:
-h --help this help text.
-v --verbose increase verbosity.
-q --quiet decrease verbosity.
"""
def main():
global verbose
try:
optv, argv = getopt(sys.argv[1:], "hvq", ["help", "verbose", "quiet"])
except GetoptError as e:
print(e)
usage(sys.stderr)
exit(1)
for opt, arg in optv:
if opt in ("-h", "--help"):
usage(sys.stdout)
exit(0)
elif opt in ("-v", "--verbose"):
verbose += 1
elif opt in ("-q", "--quiet"):
verbose -= 1
if not argv:
usage(sys.stderr)
exit(1)
cmd = commands.get(argv[0])
if not cmd:
print >>sys.stderr, "E: unknown command %r" % argv[0]
exit(1)
cmd(argv[1:])
if __name__ == '__main__':
main()
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
/*
Git-backup - Backup set of Git repositories & just files; efficiently
This program backups files and set of bare Git repositories into one Git repository.
Files are copied to blobs and then added to tree under certain place, and for
Git repositories, all reachable objects are pulled in with maintaining index
which remembers reference -> sha1 for every pulled repositories.
After objects from backuped Git repositories are pulled in, we create new
commit which references tree with changed backup index and files, and also has
all head objects from pulled-in repositories in its parents(*). This way backup
has history and all pulled objects become reachable from single head commit in
backup repository. In particular that means that the whole state of backup can
be described with only single sha1, and that backup repository itself could be
synchronized via standard git pull/push, be repacked, etc.
Restoration process is the opposite - from a particular backup state, files are
extracted at a proper place, and for Git repositories a pack with all objects
reachable from that repository heads is prepared and extracted from backup
repository object database.
This approach allows to leverage Git's good ability for object contents
deduplication and packing, especially for cases when there are many hosted
repositories which are forks of each other with relatively minor changes in
between each other and over time, and mostly common base. In author experience
the size of backup is dramatically smaller compared to straightforward "let's
tar it all" approach.
Data for all backuped files and repositories can be accessed if one has access
to backup repository, so either they all should be in the same security domain,
or extra care has to be taken to protect access to backup repository.
File permissions are not managed with strict details due to inherent
nature of Git. This aspect can be improved with e.g. etckeeper-like
(http://etckeeper.branchable.com/) approach if needed.
Please see README.rst with user-level overview on how to use git-backup.
NOTE the idea of pulling all refs together is similar to git-namespaces
http://git-scm.com/docs/gitnamespaces
(*) Tag objects are handled specially - because in a lot of places Git insists and
assumes commit parents can only be commit objects. We encode tag objects in
specially-crafted commit object on pull, and decode back on backup restore.
We do likewise if a ref points to tree or blob, which is valid in Git.
*/
package main
import (
"flag"
"fmt"
"os"
pathpkg "path"
"path/filepath"
"runtime/debug"
"sort"
"strings"
"syscall"
"time"
)
// verbose output
// 0 - silent
// 1 - info
// 2 - progress of long-running operations
// 3 - debug
var verbose = 1
func infof(format string, a ...interface{}) {
if verbose > 0 {
fmt.Printf(format, a...)
fmt.Println()
}
}
// what to pass to git subprocess to stdout/stderr
// DontRedirect - no-redirection, PIPE - output to us
func gitprogress() StdioRedirect {
if verbose > 1 {
return DontRedirect
}
return PIPE
}
func debugf(format string, a ...interface{}) {
if verbose > 2 {
fmt.Printf(format, a...)
fmt.Println()
}
}
// -------- git operations (like create/extract blob, commit tree ...) --------
// file -> blob_sha1, mode
func file_to_blob(path string) (Sha1, uint32) {
argv := []string{"hash-object", "-w", "--no-filters"}
stdin := ""
// because we want to pass mode to outside world (to e.g. `git update-index`)
// we need to get native OS mode, not translated one as os.Lstat() would give us.
var st syscall.Stat_t
err := syscall.Lstat(path, &st)
if err != nil {
raise(&os.PathError{"lstat", path, err})
}
if st.Mode&syscall.S_IFMT == syscall.S_IFLNK {
// git hash-object does not handle symlinks
argv = append(argv, "--stdin")
stdin, err = os.Readlink(path)
raiseif(err)
} else {
argv = append(argv, "--", path)
// stdin = "" already
}
blob_sha1 := xgit2Sha1(argv, RunWith{stdin: stdin})
return blob_sha1, st.Mode
}
// blob_sha1, mode -> file
func blob_to_file(blob_sha1 Sha1, mode uint32, path string) {
blob_content := xgit("cat-file", "blob", blob_sha1, RunWith{raw: true})
err := os.MkdirAll(pathpkg.Dir(path), 0777)
raiseif(err)
if mode&syscall.S_IFMT == syscall.S_IFLNK {
err = os.Symlink(blob_content, path)
raiseif(err)
} else {
// NOTE mode is native - we cannot use ioutil.WriteFile() directly
err = writefile(path, Bytes(blob_content), mode)
raiseif(err)
}
}
// create empty git tree -> tree sha1
var tree_empty Sha1
func mktree_empty() Sha1 {
if tree_empty.IsNull() {
tree_empty = xgitSha1("mktree", RunWith{stdin: ""})
}
return tree_empty
}
// `git commit-tree` -> commit_sha1, raise on error
type AuthorInfo struct {
name string
email string
date string
}
func xcommit_tree2(tree Sha1, parents []Sha1, msg string, author AuthorInfo, committer AuthorInfo) Sha1 {
argv := []string{"commit-tree", tree.String()}
for _, p := range parents {
argv = append(argv, "-p", p.String())
}
// env []string -> {}
env := map[string]string{}
for _, e := range os.Environ() {
i := strings.Index(e, "=")
if i == -1 {
panic(fmt.Errorf("E: env variable format invalid: %q", e))
}
k, v := e[:i], e[i+1:]
if _, dup := env[k]; dup {
panic(fmt.Errorf("E: env has duplicate entry for %q", k))
}
env[k] = v
}
if author.name != "" { env["GIT_AUTHOR_NAME"] = author.name }
if author.email != "" { env["GIT_AUTHOR_EMAIL"] = author.email }
if author.date != "" { env["GIT_AUTHOR_DATE"] = author.date }
if committer.name != "" { env["GIT_COMMITTER_NAME"] = committer.name }
if committer.email != "" { env["GIT_COMMITTER_EMAIL"] = committer.email }
if committer.date != "" { env["GIT_COMMITTER_DATE"] = committer.date }
return xgit2Sha1(argv, RunWith{stdin: msg, env: env})
}
func xcommit_tree(tree Sha1, parents []Sha1, msg string) Sha1 {
return xcommit_tree2(tree, parents, msg, AuthorInfo{}, AuthorInfo{})
}
// -------- tags representation --------
// represent tag/tree/blob as specially crafted commit
//
// The reason we do this is that we want refs/tag/* to be parents of synthetic
// backup commit, but git does not allow tag objects to be in commit parents.
// Also besides commit and tag, it is possible for a ref to point to a tree or blob.
//
// We always attach original tagged object to crafted commit in one way or
// another, so that on backup restore we only have to recreate original tag
// object and tagged object is kept there in repo thanks to it being reachable
// through created commit.
var tag_tree_blob = StrSet{"tag": {}, "tree": {}, "blob": {}}
func obj_represent_as_commit(sha1 Sha1, obj_type string) Sha1 {
if obj_type == "" {
obj_type = xgit("cat-file", "-t", sha1)
}
if !tag_tree_blob.Contains(obj_type) {
raisef("%s (%s): cannot encode as commit", sha1, obj_type)
}
// first line in commit msg = object type
obj_encoded := obj_type + "\n"
var tagged_type string
var tagged_sha1 Sha1
// below the code layout is mainly for tag type, and we hook tree and blob
// types handling into that layout
if obj_type == "tag" {
tag, tag_raw := xload_tag(sha1)
tagged_type = tag.tagged_type
tagged_sha1 = tag.tagged_sha1
obj_encoded += tag_raw
} else {
// for tree/blob we only care that object stays reachable
tagged_type = obj_type
tagged_sha1 = sha1
}
// all commits we do here - we do with fixed name/date, so transformation
// tag->commit is stable wrt git environment and time change
fixed := AuthorInfo{name: "Git backup", email: "git@backup.org", date: "@0 +0000"}
zcommit_tree := func(tree Sha1, parents []Sha1, msg string) Sha1 {
return xcommit_tree2(tree, parents, msg, fixed, fixed)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> ø
// Commit .parent -> Commit
if tagged_type == "commit" {
return zcommit_tree(mktree_empty(), []Sha1{tagged_sha1}, obj_encoded)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> Tree
// Tree .parent -> ø
if tagged_type == "tree" {
return zcommit_tree(tagged_sha1, []Sha1{}, obj_encoded)
}
// Tag ~> Commit*
// | .msg: Tag
// v .tree -> Tree* "tagged" -> Blob
// Blob .parent -> ø
if tagged_type == "blob" {
tree_for_blob := xgitSha1("mktree", RunWith{stdin: fmt.Sprintf("100644 blob %s\ttagged\n", tagged_sha1)})
return zcommit_tree(tree_for_blob, []Sha1{}, obj_encoded)
}
// Tag₂ ~> Commit₂*
// | .msg: Tag₂
// v .tree -> ø
// Tag₁ .parent -> Commit₁*
if tagged_type == "tag" {
commit1 := obj_represent_as_commit(tagged_sha1, tagged_type)
return zcommit_tree(mktree_empty(), []Sha1{commit1}, obj_encoded)
}
raisef("%s (%q): unknown tagged type", sha1, tagged_type)
panic(0)
}
// recreate tag/tree/blob from specially crafted commit
// (see obj_represent_as_commit() about how a objects are originally translated into commit)
func obj_recreate_from_commit(commit_sha1 Sha1) {
xraise := func(info interface{}) { raise(&RecreateObjError{commit_sha1, info}) }
xraisef := func(f string, a ...interface{}) { xraise(fmt.Sprintf(f, a...)) }
commit, _ := xload_commit(commit_sha1)
if len(commit.parentv) > 1 {
xraise(">1 parents")
}
obj_type, obj_raw, err := headtail(commit.msg, "\n")
if err != nil {
xraise("invalid encoded format")
}
if !tag_tree_blob.Contains(obj_type) {
xraisef("unexpected encoded object type %q", obj_type)
}
// for tree/blob we do not need to do anything - that objects were reachable
// from commit and are present in git db.
if obj_type == "tree" || obj_type == "blob" {
return
}
// re-create tag object
tag_sha1 := xgitSha1("hash-object", "-t", "tag", "-w", "--stdin", RunWith{stdin: obj_raw})
// the original tagged object should be already in repository, because we
// always attach it to encoding commit one way or another,
// except we need to recurse, if it was Tag₂->Tag₁
tag, err := tag_parse(obj_raw)
if err != nil {
xraisef("encoded tag: %s", err)
}
if tag.tagged_type == "tag" {
if len(commit.parentv) == 0 {
xraise("encoded tag corrupt (tagged is tag but []parent is empty)")
}
obj_recreate_from_commit(commit.parentv[0])
}
// verify consistency via re-encoding tag again
commit_sha1_ := obj_represent_as_commit(tag_sha1, "tag")
if commit_sha1_ != commit_sha1 {
xraisef("encoded tag corrupt (reencoded as %s)", commit_sha1_)
}
}
type RecreateObjError struct {
commit_sha1 Sha1
info interface{}
}
func (e *RecreateObjError) Error() string {
return fmt.Sprintf("commit %s: %s", e.commit_sha1, e.info)
}
// -------- git-backup pull --------
func cmd_pull_usage() {
fmt.Fprint(os.Stderr,
`git-backup pull <dir1>:<prefix1> <dir2>:<prefix2> ...
Pull bare Git repositories & just files from dir1 into backup prefix1,
from dir2 into backup prefix2, etc...
`)
}
type PullSpec struct {
dir, prefix string
}
func cmd_pull(argv []string) {
flags := flag.FlagSet{Usage: cmd_pull_usage}
flags.Init("", flag.ExitOnError)
flags.Parse(argv)
argv = flags.Args()
if len(argv) < 1 {
cmd_pull_usage()
os.Exit(1)
}
pullspecv := []PullSpec{}
for _, arg := range argv {
dir, prefix, err := split2(arg, ":")
if err != nil {
fmt.Fprintf(os.Stderr, "E: invalid pullspec %q\n", arg)
cmd_pull_usage()
os.Exit(1)
}
pullspecv = append(pullspecv, PullSpec{dir, prefix})
}
cmd_pull_(pullspecv)
}
// info about ref pointing to sha1
type Ref struct {
ref string
sha1 Sha1
}
func cmd_pull_(pullspecv []PullSpec) {
// while pulling, we'll keep refs from all pulled repositories under temp
// unique work refs namespace.
backup_time := time.Now().Format("20060102-1504") // %Y%m%d-%H%M
backup_refs_work := fmt.Sprintf("refs/backup/%s/", backup_time) // refs/backup/20150820-2109/
backup_lock := "refs/backup.locked"
// make sure another `git-backup pull` is not running
xgit("update-ref", backup_lock, mktree_empty(), Sha1{})
// make sure there is root commit
gerr, _, _ := git("rev-parse", "--verify", "HEAD")
if gerr != nil {
infof("# creating root commit")
// NOTE `git commit` does not work in bare repo - do commit by hand
commit := xcommit_tree(mktree_empty(), []Sha1{}, "Initialize git-backup repository")
xgit("update-ref", "-m", "git-backup pull init", "HEAD", commit)
}
// walk over specified dirs, pulling objects from git and adding non-git-object files
for _, __ := range pullspecv {
dir, prefix := __.dir, __.prefix
// make sure index is empty for prefix (so that we start from clean
// prefix namespace and this way won't leave stale removed things)
xgit("rm", "--cached", "-r", "--ignore-unmatch", "--", prefix)
here := myfuncname()
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) (errout error) {
// any error -> stop
if err != nil {
return err
}
// propagate exceptions properly via filepath.Walk as errors with calling context
// (filepath is not our code)
defer errcatch(func(e *Error) {
errout = erraddcallingcontext(here, e)
})
// files -> add directly to index to commit later
if !info.IsDir() {
infof("# file %s\t<- %s", prefix, path)
blob, mode := file_to_blob(path)
xgit("update-index", "--add", "--cacheinfo",
fmt.Sprintf("%o,%s,%s", mode, blob, reprefix(dir, prefix, path)))
return nil
}
// directories -> look for *.git and handle git object specially.
// do not recurse into *.git/objects/ - we'll save them specially
if strings.HasSuffix(path, ".git/objects") {
return filepath.SkipDir
}
// else we recurse, but handle *.git specially - via fetching objects from it
if !strings.HasSuffix(path, ".git") {
return nil
}
// git repo - let's pull all refs from it to our backup refs namespace
infof("# git %s\t<- %s", prefix, path)
// NOTE --no-tags : do not try to autoextend commit -> covering tag
// NOTE fetch.fsckObjects=true : check objects for corruption as they are fetched
xgit("-c", "fetch.fsckObjects=true",
"fetch", "--no-tags", path,
fmt.Sprintf("refs/*:%s%s/*", backup_refs_work,
// NOTE repo name is escaped as it can contain e.g. spaces, and refs must not
path_refescape(reprefix(dir, prefix, path))),
// TODO do not show which ref we pulled - show only pack transfer progress
RunWith{stderr: gitprogress()})
// XXX do we want to do full fsck of source git repo on pull as well ?
return nil
})
// re-raise / raise error after Walk
if err != nil {
e := aserror(err)
e = erraddcontext(e, "pulling from "+dir)
raise(e)
}
}
// all refs from all found git repositories populated.
// now prepare manifest with ref -> sha1 and do a synthetic commit merging all that sha1
// (so they become all reachable from HEAD -> survive repack and be transferable on git pull)
//
// NOTE we handle tag/tree/blob objects specially - because these objects cannot
// be in commit parents, we convert them to specially-crafted commits and use them.
// The commits prepared contain full info how to restore original objects.
// backup.refs format:
//
// 1eeb0324 <prefix>/wendelin.core.git/heads/master
// 213a9243 <prefix>/wendelin.core.git/tags/v0.4 <213a9243-converted-to-commit>
// ...
//
// NOTE `git for-each-ref` sorts output by ref
// -> backup_refs is sorted and stable between runs
backup_refs_dump := xgit("for-each-ref", backup_refs_work)
backup_refs_list := []Ref{} // parsed dump
backup_refsv := []string{} // backup.refs content
backup_refs_parents := Sha1Set{} // sha1 for commit parents, obtained from refs
noncommit_seen := map[Sha1]Sha1{} // {} sha1 -> sha1_ (there are many duplicate tags)
for _, __ := range strings.Split(backup_refs_dump, "\n") {
sha1, type_, ref := Sha1{}, "", ""
_, err := fmt.Sscanf(__, "%s %s %s\n", &sha1, &type_, &ref)
if err != nil {
raisef("%s: strange for-each-ref entry %q", backup_refs_work, __)
}
backup_refs_list = append(backup_refs_list, Ref{ref, sha1})
backup_refs_entry := fmt.Sprintf("%s %s", sha1, strip_prefix(backup_refs_work, ref))
// represent tag/tree/blob as specially crafted commit, because we
// cannot use it as commit parent.
sha1_ := sha1
if type_ != "commit" {
//infof("obj_as_commit %s %s\t%s", sha1, type_, ref) XXX
var seen bool
sha1_, seen = noncommit_seen[sha1]
if !seen {
sha1_ = obj_represent_as_commit(sha1, type_)
noncommit_seen[sha1] = sha1_
}
backup_refs_entry += fmt.Sprintf(" %s", sha1_)
}
backup_refsv = append(backup_refsv, backup_refs_entry)
if !backup_refs_parents.Contains(sha1_) { // several refs can refer to the same sha1
backup_refs_parents.Add(sha1_)
}
}
backup_refs := strings.Join(backup_refsv, "\n")
backup_refs_parentv := backup_refs_parents.Elements()
sort.Sort(BySha1(backup_refs_parentv)) // so parents order is stable in between runs
// backup_refs -> blob
backup_refs_sha1 := xgitSha1("hash-object", "-w", "--stdin", RunWith{stdin: backup_refs})
// add backup_refs blob to index
xgit("update-index", "--add", "--cacheinfo", fmt.Sprintf("100644,%s,backup.refs", backup_refs_sha1))
// index is ready - prepare tree and commit
backup_tree_sha1 := xgitSha1("write-tree")
HEAD := xgitSha1("rev-parse", "HEAD")
commit_sha1 := xcommit_tree(backup_tree_sha1, append([]Sha1{HEAD}, backup_refs_parentv...),
"Git-backup " + backup_time)
xgit("update-ref", "-m", "git-backup pull", "HEAD", commit_sha1, HEAD)
// remove no-longer needed backup refs & verify they don't stay
// FIXME `delete` deletes only files, but leaves empty dirs around.
// more important: this affect performance of future `git-backup pull` run a *LOT*
//
// reason is: `git pull` first check local refs, and for doing so it
// recourse into all directories, even empty ones.
//
// https://lab.nexedi.com/lab.nexedi.com/lab.nexedi.com/issues/4
//
// -> TODO also remove empty directories.
backup_refs_delete := ""
for _, __ := range backup_refs_list {
backup_refs_delete += fmt.Sprintf("delete %s %s\n", __.ref, __.sha1)
}
xgit("update-ref", "--stdin", RunWith{stdin: backup_refs_delete})
__ := xgit("for-each-ref", backup_refs_work)
if __ != "" {
raisef("Backup refs under %s not deleted properly", backup_refs_work)
}
// if we have working copy - update it
bare := xgit("rev-parse", "--is-bare-repository")
if bare != "true" {
// `git checkout-index -af` -- does not delete deleted files
// `git read-tree -v -u --reset HEAD~ HEAD` -- needs index matching
// original worktree to properly work, but we already have updated index
//
// so we get changes we committed as diff and apply to worktree
diff := xgit("diff", "--binary", HEAD, "HEAD", RunWith{raw: true})
if diff != "" {
diffstat := xgit("apply", "--stat", "--apply", "--binary", "--whitespace=nowarn",
RunWith{stdin: diff, raw: true})
infof("%s", diffstat)
}
}
// we are done - unlock
xgit("update-ref", "-d", backup_lock)
}
// -------- git-backup restore --------
func cmd_restore_usage() {
fmt.Fprint(os.Stderr,
`git-backup restore <commit-ish> <prefix1>:<dir1> <prefix2>:<dir2> ...
Restore Git repositories & just files from backup prefix1 into dir1,
from backup prefix2 into dir2, etc...
Backup state to restore is taken from <commit-ish>.
`)
}
type RestoreSpec struct {
prefix, dir string
}
func cmd_restore(argv []string) {
flags := flag.FlagSet{Usage: cmd_restore_usage}
flags.Init("", flag.ExitOnError)
flags.Parse(argv)
argv = flags.Args()
if len(argv) < 2 {
cmd_restore_usage()
os.Exit(1)
}
HEAD := argv[0]
restorespecv := []RestoreSpec{}
for _, arg := range argv[1:] {
prefix, dir, err := split2(arg, ":")
if err != nil {
fmt.Fprintf(os.Stderr, "E: invalid restorespec %q\n", arg)
cmd_restore_usage()
os.Exit(1)
}
restorespecv = append(restorespecv, RestoreSpec{prefix, dir})
}
cmd_restore_(HEAD, restorespecv)
}
// kirr/wendelin.core.git/heads/master -> kirr/wendelin.core.git, heads/master
// tiwariayush/Discussion%20Forum%20.git/... -> tiwariayush/Discussion Forum .git, ...
func reporef_split(reporef string) (repo, ref string) {
dotgit := strings.Index(reporef, ".git/")
if dotgit == -1 {
raisef("E: %s is not a ref for a git repo", reporef)
}
repo, ref = reporef[:dotgit+4], reporef[dotgit+4+1:]
repo, err := path_refunescape(repo) // unescape repo name we originally escaped when making backup
raiseif(err)
return repo, ref
}
// sha1 value(s) for a ref in 'backup.refs'
type BackupRefSha1 struct {
sha1 Sha1 // original sha1 this ref was pointing to in original repo
sha1_ Sha1 // sha1 actually used to represent sha1's object in backup repo
// (for tag/tree/blob - they are converted to commits)
}
// ref entry in 'backup.refs' (repo prefix stripped)
type BackupRef struct {
refname string // ref without "refs/" prefix
BackupRefSha1
}
// {} refname -> sha1, sha1_
type RefMap map[string]BackupRefSha1
// info about a repository from backup.refs
type BackupRepo struct {
repopath string // full repo path with backup prefix
refs RefMap
}
// all RefMap values as flat []BackupRef
func (m RefMap) Values() []BackupRef {
ev := make([]BackupRef, 0, len(m))
for ref, refsha1 := range m {
ev = append(ev, BackupRef{ref, refsha1})
}
return ev
}
// for sorting []BackupRef by refname
type ByRefname []BackupRef
func (br ByRefname) Len() int { return len(br) }
func (br ByRefname) Swap(i, j int) { br[i], br[j] = br[j], br[i] }
func (br ByRefname) Less(i, j int) bool { return strings.Compare(br[i].refname, br[j].refname) < 0 }
// all sha1 heads RefMap points to, in sorted order
func (m RefMap) Sha1Heads() []Sha1 {
hs := Sha1Set{}
for _, refsha1 := range m {
hs.Add(refsha1.sha1)
}
headv := hs.Elements()
sort.Sort(BySha1(headv))
return headv
}
// like Sha1Heads() but returns heads in text format delimited by "\n"
func (m RefMap) Sha1HeadsStr() string {
s := ""
for _, sha1 := range m.Sha1Heads() {
s += sha1.String() + "\n"
}
return s
}
// for sorting []BackupRepo by repopath
type ByRepoPath []*BackupRepo
func (br ByRepoPath) Len() int { return len(br) }
func (br ByRepoPath) Swap(i, j int) { br[i], br[j] = br[j], br[i] }
func (br ByRepoPath) Less(i, j int) bool { return strings.Compare(br[i].repopath, br[j].repopath) < 0 }
// also for searching sorted []BackupRepo by repopath prefix
func (br ByRepoPath) Search(prefix string) int {
return sort.Search(len(br), func (i int) bool {
return strings.Compare(br[i].repopath, prefix) >= 0
})
}
func cmd_restore_(HEAD_ string, restorespecv []RestoreSpec) {
HEAD := xgitSha1("rev-parse", "--verify", HEAD_)
// read backup refs index
repotab := map[string]*BackupRepo{} // repo.path -> repo
backup_refs := xgit("cat-file", "blob", fmt.Sprintf("%s:backup.refs", HEAD))
for _, refentry := range strings.Split(backup_refs, "\n") {
// sha1 prefix+refname (sha1_)
badentry := func() { raisef("E: invalid backup.refs entry: %q", refentry) }
refentryv := strings.Fields(refentry)
if !(2 <= len(refentryv) && len(refentryv) <= 3) {
badentry()
}
sha1, err := Sha1Parse(refentryv[0])
sha1_, err_ := sha1, err
if len(refentryv) == 3 {
sha1_, err_ = Sha1Parse(refentryv[2])
}
if err != nil || err_ != nil {
badentry()
}
reporef := refentryv[1]
repopath, ref := reporef_split(reporef)
repo := repotab[repopath]
if repo == nil {
repo = &BackupRepo{repopath, RefMap{}}
repotab[repopath] = repo
}
if _, alreadyin := repo.refs[ref]; alreadyin {
raisef("E: duplicate ref %s in backup.refs", reporef)
}
repo.refs[ref] = BackupRefSha1{sha1, sha1_}
}
// flattened & sorted repotab
// NOTE sorted - to process repos always in the same order & for searching
repov := make([]*BackupRepo, 0, len(repotab))
for _, repo := range repotab {
repov = append(repov, repo)
}
sort.Sort(ByRepoPath(repov))
// repotab no longer needed
repotab = nil
// walk over specified prefixes restoring files and packs in *.git
for _, __ := range restorespecv {
prefix, dir := __.prefix, __.dir
// ensure dir did not exist before restore run
err := os.Mkdir(dir, 0777)
raiseif(err)
// files
lstree := xgit("ls-tree", "--full-tree", "-r", "-z", "--", HEAD, prefix, RunWith{raw: true})
repos_seen := StrSet{} // dirs of *.git seen while restoring files
for _, __ := range strings.Split(lstree, "\x00") {
if __ == "" {
continue // last empty line after last \0
}
mode, type_, sha1, filename, err := parse_lstree_entry(__)
// NOTE
// - `ls-tree -r` shows only leaf objects
// - git-backup repository does not have submodules and the like
// -> type should be "blob" only
if err != nil || type_ != "blob" {
raisef("%s: invalid/unexpected ls-tree entry %q", HEAD, __)
}
filename = reprefix(prefix, dir, filename)
infof("# file %s\t-> %s", prefix, filename)
blob_to_file(sha1, mode, filename)
// make sure git will recognize *.git as repo:
// - it should have refs/{heads,tags}/ and objects/pack/ inside.
//
// NOTE doing it while restoring files, because a repo could be
// empty - without refs at all, and thus next "git packs restore"
// step will not be run for it.
filedir := pathpkg.Dir(filename)
if strings.HasSuffix(filedir, ".git") && !repos_seen.Contains(filedir) {
infof("# repo %s\t-> %s", prefix, filedir)
for _, __ := range []string{"refs/heads", "refs/tags", "objects/pack"} {
err := os.MkdirAll(filedir+"/"+__, 0777)
raiseif(err)
}
repos_seen.Add(filedir)
}
}
// git packs
for i := ByRepoPath(repov).Search(prefix); i < len(repov); i++ {
repo := repov[i]
if !strings.HasPrefix(repo.repopath, prefix) {
break // repov is sorted - end of repositories with prefix
}
repopath := reprefix(prefix, dir, repo.repopath)
infof("# git %s\t-> %s", prefix, repopath)
// make sure tag/tree/blob objects represented as commits are
// present, before we generate pack for restored repo.
// ( such objects could be lost e.g. after backup repo repack as they
// are not reachable from backup repo HEAD )
for _, __ := range repo.refs {
if __.sha1 != __.sha1_ {
obj_recreate_from_commit(__.sha1_)
}
}
// extract pack for that repo from big backup pack + decoded tags
pack_argv := []string{
"pack-objects",
"--revs", // include all objects referencable from input sha1 list
"--reuse-object", "--reuse-delta", "--delta-base-offset"}
if verbose <= 0 {
pack_argv = append(pack_argv, "-q")
}
pack_argv = append(pack_argv, repopath+"/objects/pack/pack")
xgit2(pack_argv, RunWith{stdin: repo.refs.Sha1HeadsStr(), stderr: gitprogress()})
// verify that extracted repo refs match backup.refs index after extraction
x_ref_list := xgit("--git-dir=" + repopath,
"for-each-ref", "--format=%(objectname) %(refname)")
repo_refs := repo.refs.Values()
sort.Sort(ByRefname(repo_refs))
repo_ref_listv := make([]string, 0, len(repo_refs))
for _, __ := range repo_refs {
repo_ref_listv = append(repo_ref_listv, fmt.Sprintf("%s refs/%s", __.sha1, __.refname))
}
repo_ref_list := strings.Join(repo_ref_listv, "\n")
if x_ref_list != repo_ref_list {
raisef("E: extracted %s refs corrupt", repopath)
}
// check connectivity in recreated repository.
//
// This way we verify that extracted pack indeed contains all
// objects for all refs in the repo.
//
// Compared to fsck we do not re-compute sha1 sum of objects which
// is significantly faster.
gerr, _, _ := git("--git-dir=" + repopath,
"rev-list", "--objects", "--stdin", "--quiet", RunWith{stdin: repo.refs.Sha1HeadsStr()})
if gerr != nil {
fmt.Fprintln(os.Stderr, "E: Problem while checking connectivity of extracted repo:")
raise(gerr)
}
// XXX disabled because it is slow
// // NOTE progress goes to stderr, problems go to stdout
// xgit("--git-dir=" + repopath, "fsck",
// # only check that traversal from refs is ok: this unpacks
// # commits and trees and verifies blob objects are there,
// # but do _not_ unpack blobs =fast.
// "--connectivity-only",
// RunWith{stdout: gitprogress(), stderr: gitprogress()})
}
}
}
var commands = map[string]func([]string){
"pull": cmd_pull,
"restore": cmd_restore,
}
func usage() {
fmt.Fprintf(os.Stderr,
`git-backup [options] <command>
pull pull git-repositories and files to backup
restore restore git-repositories and files from backup
common options:
-h --help this help text.
-v increase verbosity.
-q decrease verbosity.
`)
}
func main() {
flag.Usage = usage
quiet := 0
flag.Var((*countFlag)(&verbose), "v", "verbosity level")
flag.Var((*countFlag)(&quiet), "q", "decrease verbosity")
flag.Parse()
verbose -= quiet
argv := flag.Args()
if len(argv) == 0 {
usage()
os.Exit(1)
}
cmd := commands[argv[0]]
if cmd == nil {
fmt.Fprintf(os.Stderr, "E: unknown command %q", argv[0])
os.Exit(1)
}
// catch Error and report info from it
here := myfuncname()
defer errcatch(func(e *Error) {
e = erraddcallingcontext(here, e)
fmt.Fprintln(os.Stderr, e)
// also show traceback if debug
if verbose > 2 {
fmt.Fprint(os.Stderr, "\n")
debug.PrintStack()
}
os.Exit(1)
})
cmd(argv[1:])
}
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
"syscall"
"testing"
)
func xgetcwd(t *testing.T) string {
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
return cwd
}
func xchdir(t *testing.T, dir string) {
err := os.Chdir(dir)
if err != nil {
t.Fatal(err)
}
}
// verify end-to-end pull-restore
func TestPullRestore(t *testing.T) {
// if something raises -> don't let testing panic - report it as proper error with context.
here := myfuncname()
defer errcatch(func(e *Error) {
e = erraddcallingcontext(here, e)
// add file:line for failing code inside testing function - so we have exact context to debug
failedat := ""
for _, f := range xtraceback(1) {
if f.Name() == here {
// TODO(go1.7) -> f.File, f.Line (f becomes runtime.Frame)
file, line := f.FileLine(f.pc - 1)
failedat = fmt.Sprintf("%s:%d", filepath.Base(file), line)
break
}
}
if failedat == "" {
panic(fmt.Errorf("cannot lookup failedat for %s", here))
}
t.Errorf("%s: %v", failedat, e)
})
workdir, err := ioutil.TempDir("", "t-git-backup")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(workdir)
mydir := xgetcwd(t)
xchdir(t, workdir)
defer xchdir(t, mydir)
// -test.v -> verbosity of git-backup
if testing.Verbose() {
verbose = 1
} else {
verbose = 0
}
// init backup repository
xgit("init", "--bare", "backup.git")
xchdir(t, "backup.git")
// pull from testdata
my1 := mydir + "/testdata/1"
cmd_pull([]string{my1+":b1"})
// prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits)
xgit("prune")
// verify backup repo is all ok
xgit("fsck")
// verify that just pulled tag objects are now gone after pruning -
// - they become not directly git-present. The only possibility to
// get them back is via recreating from encoded commit objects.
tags := []string{"11e67095628aa17b03436850e690faea3006c25d",
"ba899e5639273a6fa4d50d684af8db1ae070351e",
"7124713e403925bc772cd252b0dec099f3ced9c5",
"f735011c9fcece41219729a33f7876cd8791f659"}
for _, tag := range tags {
gerr, _, _ := git("cat-file", "-p", tag)
if gerr == nil {
t.Fatalf("tag %s still present in backup.git after git-prune", tag)
}
}
// restore backup
work1 := workdir + "/1"
cmd_restore([]string{"HEAD", "b1:"+work1})
// verify files restored to the same as original
gerr, diff, _ := git("diff", "--no-index", "--raw", "--exit-code", my1, work1)
// 0 - no diff, 1 - has diff, 2 - problem
if gerr != nil && gerr.Sys().(syscall.WaitStatus).ExitStatus() > 1 {
t.Fatal(gerr)
}
gitObjectsRe := regexp.MustCompile(`\.git/objects/`)
for _, diffline := range strings.Split(diff, "\n") {
// :srcmode dstmode srcsha1 dstsha1 status\tpath
_, path, err := headtail(diffline, "\t")
if err != nil {
t.Fatalf("restorecheck: cannot parse diff line %q", diffline)
}
// git objects can be represented differently (we check them later)
if gitObjectsRe.FindString(path) != "" {
continue
}
t.Fatal("restorecheck: unexpected diff:", diffline)
}
// verify git objects restored to the same as original
err = filepath.Walk(my1, func(path string, info os.FileInfo, err error) error {
// any error -> stop
if err != nil {
return err
}
// non *.git/ -- not interesting
if !(info.IsDir() && strings.HasSuffix(path, ".git")) {
return nil
}
// found git repo - check refs & objects in original and restored are exactly the same,
var R = [2]struct{ path, reflist, revlist string }{
{path: path}, // original
{path: reprefix(my1, work1, path)}, // restored
}
for _, repo := range R {
// fsck just in case
xgit("--git-dir=" + repo.path, "fsck")
// NOTE for-each-ref sorts output by refname
repo.reflist = xgit("--git-dir=" + repo.path, "for-each-ref")
// NOTE rev-list emits objects in reverse chronological order,
// starting from refs roots which are also ordered by refname
repo.revlist = xgit("--git-dir=" + repo.path, "rev-list", "--all", "--objects")
}
if R[0].reflist != R[1].reflist {
t.Fatalf("restorecheck: %q restored with different reflist (in %q)", R[0].path, R[1].path)
}
if R[0].revlist != R[1].revlist {
t.Fatalf("restorecheck: %q restored with differrent objects (in %q)", R[0].path, R[1].path)
}
// .git verified - no need to recurse
return filepath.SkipDir
})
if err != nil {
t.Fatal(err)
}
// now try to pull corrupt repo - pull should refuse if transferred pack contains bad objects
my2 := mydir + "/testdata/2"
func() {
defer errcatch(func(e *Error) {
// it ok - pull should raise
})
cmd_pull([]string{my2+":b2"})
t.Fatal("fetching from corrupt.git did not complain")
}()
}
func TestRepoRefSplit(t *testing.T) {
var tests = []struct{ reporef, repo, ref string }{
{"kirr/wendelin.core.git/heads/master", "kirr/wendelin.core.git", "heads/master"},
{"kirr/erp5.git/backup/x/master+erp5-data-notebook", "kirr/erp5.git", "backup/x/master+erp5-data-notebook"},
{"tiwariayush/Discussion%20Forum%20.git/...", "tiwariayush/Discussion Forum .git", "..."},
{"tiwariayush/Discussion%20Forum+.git/...", "tiwariayush/Discussion Forum+.git", "..."},
{"tiwariayush/Discussion%2BForum+.git/...", "tiwariayush/Discussion+Forum+.git", "..."},
}
for _, tt := range tests {
repo, ref := reporef_split(tt.reporef)
if repo != tt.repo || ref != tt.ref {
t.Errorf("reporef_split(%q) -> %q %q ; want %q %q", tt.reporef, repo, ref, tt.repo, tt.ref)
}
}
}
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// Git-backup | Run git subprocess
package main
import (
"bytes"
"fmt"
"os"
"os/exec"
"strings"
)
// how/whether to redirect stdio of spawned process
type StdioRedirect int
const (
PIPE StdioRedirect = iota // connect stdio channel via PIPE to parent (default value)
DontRedirect
)
type RunWith struct {
stdin string
stdout StdioRedirect // PIPE | DontRedirect
stderr StdioRedirect // PIPE | DontRedirect
raw bool // !raw -> stdout, stderr are stripped
env map[string]string // !nil -> subprocess environment setup from env
}
// run `git *argv` -> error, stdout, stderr
func _git(argv []string, ctx RunWith) (err error, stdout, stderr string) {
debugf("git %s", strings.Join(argv, " "))
cmd := exec.Command("git", argv...)
stdoutBuf := bytes.Buffer{}
stderrBuf := bytes.Buffer{}
if ctx.stdin != "" {
cmd.Stdin = strings.NewReader(ctx.stdin)
}
switch ctx.stdout {
case PIPE:
cmd.Stdout = &stdoutBuf
case DontRedirect:
cmd.Stdout = os.Stdout
default:
panic("git: stdout redirect mode invalid")
}
switch ctx.stderr {
case PIPE:
cmd.Stderr = &stderrBuf
case DontRedirect:
cmd.Stderr = os.Stderr
default:
panic("git: stderr redirect mode invalid")
}
if ctx.env != nil {
env := []string{}
for k, v := range ctx.env {
env = append(env, k+"="+v)
}
cmd.Env = env
}
err = cmd.Run()
stdout = String(stdoutBuf.Bytes())
stderr = String(stderrBuf.Bytes())
if !ctx.raw {
// prettify stdout (e.g. so that 'sha1\n' becomes 'sha1' and can be used directly
stdout = strings.TrimSpace(stdout)
stderr = strings.TrimSpace(stderr)
}
return err, stdout, stderr
}
// error a git command returned
type GitError struct {
GitErrContext
*exec.ExitError
}
type GitErrContext struct {
argv []string
stdin string
stdout string
stderr string
}
func (e *GitError) Error() string {
msg := e.GitErrContext.Error()
if e.stderr == "" {
msg += "(failed)\n"
}
return msg
}
func (e *GitErrContext) Error() string {
msg := "git " + strings.Join(e.argv, " ")
if e.stdin == "" {
msg += " </dev/null\n"
} else {
msg += " <<EOF\n" + e.stdin
if !strings.HasSuffix(msg, "\n") {
msg += "\n"
}
msg += "EOF\n"
}
msg += e.stderr
if !strings.HasSuffix(msg, "\n") {
msg += "\n"
}
return msg
}
// argv -> []string, ctx (for passing argv + RunWith handy - see git() for details)
func _gitargv(argv ...interface{}) (argvs []string, ctx RunWith) {
ctx_seen := false
for _, arg := range argv {
switch arg := arg.(type) {
case string:
argvs = append(argvs, arg)
default:
argvs = append(argvs, fmt.Sprint(arg))
case RunWith:
if ctx_seen {
panic("git: multiple RunWith contexts")
}
ctx, ctx_seen = arg, true
}
}
return argvs, ctx
}
// run `git *argv` -> err, stdout, stderr
// - arguments are automatically converted to strings
// - RunWith argument is passed as ctx
// - error is returned only when git command could run and exits with error status
// - on other errors - exception is raised
//
// NOTE err is concrete *GitError, not error
func git(argv ...interface{}) (err *GitError, stdout, stderr string) {
return git2(_gitargv(argv...))
}
func git2(argv []string, ctx RunWith) (err *GitError, stdout, stderr string) {
e, stdout, stderr := _git(argv, ctx)
eexec, _ := e.(*exec.ExitError)
if e != nil && eexec == nil {
raisef("git %s : ", strings.Join(argv, " "), e)
}
if eexec != nil {
err = &GitError{GitErrContext{argv, ctx.stdin, stdout, stderr}, eexec}
}
return err, stdout, stderr
}
// run `git *argv` -> stdout
// on error - raise exception
func xgit(argv ...interface{}) string {
return xgit2(_gitargv(argv...))
}
func xgit2(argv []string, ctx RunWith) string {
gerr, stdout, _ := git2(argv, ctx)
if gerr != nil {
raise(gerr)
}
return stdout
}
// like xgit(), but automatically parse stdout to Sha1
func xgitSha1(argv ...interface{}) Sha1 {
return xgit2Sha1(_gitargv(argv...))
}
// error when git output is not valid sha1
type GitSha1Error struct {
GitErrContext
}
func (e *GitSha1Error) Error() string {
msg := e.GitErrContext.Error()
msg += fmt.Sprintf("expected valid sha1 (got %q)\n", e.stdout)
return msg
}
func xgit2Sha1(argv []string, ctx RunWith) Sha1 {
gerr, stdout, stderr := git2(argv, ctx)
if gerr != nil {
raise(gerr)
}
sha1, err := Sha1Parse(stdout)
if err != nil {
raise(&GitSha1Error{GitErrContext{argv, ctx.stdin, stdout, stderr}})
}
return sha1
}
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// Git-backup | Git object: Blob Tree Commit Tag
package main
import (
"errors"
"fmt"
"strings"
)
type Commit struct {
tree Sha1
parentv []Sha1
msg string
}
type Tag struct {
tagged_type string
tagged_sha1 Sha1
// TODO msg
}
// TODO Tree (if/when needed)
// TODO Blob (if/when needed)
// load/parse Commit
// extract .tree .parent[] and .msg
//
// unfortunately `git show --format=%B` adds newline and optionally wants to
// reencode commit message and otherwise heavily rely on rev-list traversal
// machinery -> so we decode commit by hand in a plumbing way.
func xload_commit(commit_sha1 Sha1) (commit *Commit, commit_raw string) {
gerr, commit_raw, _ := git("cat-file", "commit", commit_sha1, RunWith{raw: true})
if gerr != nil {
raise(&CommitLoadError{commit_sha1, gerr})
}
commit, err := commit_parse(commit_raw)
if err != nil {
raise(&CommitLoadError{commit_sha1, err})
}
return commit, commit_raw
}
type CommitLoadError struct {
commit_sha1 Sha1
err error
}
func (e *CommitLoadError) Error() string {
return fmt.Sprintf("commit %s: %s", e.commit_sha1, e.err)
}
func commit_parse(commit_raw string) (*Commit, error) {
c := Commit{}
head, msg, err := headtail(commit_raw, "\n\n")
c.msg = msg
if err != nil {
return nil, errors.New("cannot split to head & msg")
}
headv := strings.Split(head, "\n")
if len(headv) == 0 {
return nil, errors.New("empty header")
}
_, err = fmt.Sscanf(headv[0], "tree %s\n", &c.tree)
if err != nil {
return nil, errors.New("bad tree entry")
}
for _, h := range headv[1:] {
if !strings.HasPrefix(h, "parent ") {
break
}
p := Sha1{}
_, err = fmt.Sscanf(h, "parent %s\n", &p)
if err != nil {
return nil, errors.New("bad parent entry")
}
c.parentv = append(c.parentv, p)
}
return &c, nil
}
// load/parse Tag
func xload_tag(tag_sha1 Sha1) (tag *Tag, tag_raw string) {
gerr, tag_raw, _ := git("cat-file", "tag", tag_sha1, RunWith{raw: true})
if gerr != nil {
raise(&TagLoadError{tag_sha1, gerr})
}
tag, err := tag_parse(tag_raw)
if err != nil {
raise(&TagLoadError{tag_sha1, err})
}
return tag, tag_raw
}
type TagLoadError struct {
tag_sha1 Sha1
err error
}
func (e *TagLoadError) Error() string {
return fmt.Sprintf("tag %s: %s", e.tag_sha1, e.err)
}
func tag_parse(tag_raw string) (*Tag, error) {
t := Tag{}
_, err := fmt.Sscanf(tag_raw, "object %s\ntype %s\n", &t.tagged_sha1, &t.tagged_type)
if err != nil {
return nil, errors.New("invalid header")
}
return &t, nil
}
// parse lstree entry
func parse_lstree_entry(lsentry string) (mode uint32, type_ string, sha1 Sha1, filename string, err error) {
// <mode> SP <type> SP <object> TAB <file> # NOTE file can contain spaces
__, filename, err1 := headtail(lsentry, "\t")
_, err2 := fmt.Sscanf(__, "%o %s %s\n", &mode, &type_, &sha1)
if err1 != nil || err2 != nil {
return 0, "", Sha1{}, "", &InvalidLstreeEntry{lsentry}
}
// parsed ok
return
}
type InvalidLstreeEntry struct {
lsentry string
}
func (e *InvalidLstreeEntry) Error() string {
return fmt.Sprintf("invalid ls-tree entry %q", e.lsentry)
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file (in go.git repository).
package main
import (
"flag"
"fmt"
"strconv"
)
// flag that is both bool and int - for e.g. handling -v -v -v ...
// inspired/copied by/from cmd.dist.count in go.git
type countFlag int
func (c *countFlag) String() string {
return fmt.Sprint(int(*c))
}
func (c *countFlag) Set(s string) error {
switch s {
case "true":
*c++
case "false":
*c = 0
default:
n, err := strconv.Atoi(s)
if err != nil {
return fmt.Errorf("invalid count %q", s)
}
*c = countFlag(n)
}
return nil
}
// flag.boolFlag
func (c *countFlag) IsBoolFlag() bool {
return true
}
// flag.Value
var _ flag.Value = (*countFlag)(nil)
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// Git-backup | Set "template" type
// TODO -> go:generate + template
package main
// Set<Sha1>
type Sha1Set map[Sha1]struct{}
func (s Sha1Set) Add(v Sha1) {
s[v] = struct{}{}
}
func (s Sha1Set) Contains(v Sha1) bool {
_, ok := s[v]
return ok
}
// all elements of set as slice
func (s Sha1Set) Elements() []Sha1 {
ev := make([]Sha1, len(s))
i := 0
for e := range s {
ev[i] = e
i++
}
return ev
}
// Set<string>
type StrSet map[string]struct{}
func (s StrSet) Add(v string) {
s[v] = struct{}{}
}
func (s StrSet) Contains(v string) bool {
_, ok := s[v]
return ok
}
// all elements of set as slice
func (s StrSet) Elements() []string {
ev := make([]string, len(s))
i := 0
for e := range s {
ev[i] = e
i++
}
return ev
}
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// Git-backup | Sha1 type to work with SHA1 oids
package main
import (
"bytes"
"encoding/hex"
"fmt"
)
const SHA1_RAWSIZE = 20
// SHA1 value in raw form
// NOTE zero value of Sha1{} is NULL sha1
// NOTE Sha1 size is 20 bytes. On amd64
// - string size = 16 bytes
// - slice size = 24 bytes
// -> so it is reasonable to pass Sha1 not by reference
type Sha1 struct {
sha1 [SHA1_RAWSIZE]byte
}
// fmt.Stringer
var _ fmt.Stringer = Sha1{}
func (sha1 Sha1) String() string {
return hex.EncodeToString(sha1.sha1[:])
}
func Sha1Parse(sha1str string) (Sha1, error) {
sha1 := Sha1{}
if hex.DecodedLen(len(sha1str)) != SHA1_RAWSIZE {
return Sha1{}, fmt.Errorf("sha1parse: %q invalid", sha1str)
}
_, err := hex.Decode(sha1.sha1[:], Bytes(sha1str))
if err != nil {
return Sha1{}, fmt.Errorf("sha1parse: %q invalid: %s", sha1str, err)
}
return sha1, nil
}
// fmt.Scanner
var _ fmt.Scanner = (*Sha1)(nil)
func (sha1 *Sha1) Scan(s fmt.ScanState, ch rune) error {
switch ch {
case 's', 'v':
default:
return fmt.Errorf("Sha1.Scan: invalid verb %q", ch)
}
tok, err := s.Token(true, nil)
if err != nil {
return err
}
*sha1, err = Sha1Parse(String(tok))
return err
}
// check whether sha1 is null
func (sha1 *Sha1) IsNull() bool {
return *sha1 == Sha1{}
}
// for sorting by Sha1
type BySha1 []Sha1
func (p BySha1) Len() int { return len(p) }
func (p BySha1) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p BySha1) Less(i, j int) bool { return bytes.Compare(p[i].sha1[:], p[j].sha1[:]) < 0 }
[core]
repositoryformatversion = 0
filemode = true
bare = true
Unnamed repository; edit this file 'description' to name the repository.
# git ls-files --others --exclude-from=.git/info/exclude
# Lines that start with '#' are comments.
# For a project mostly in C, the following would be a good set of
# exclude patterns (uncomment them if you want to use them):
# *.[oa]
# *~
[core]
repositoryformatversion = 0
filemode = true
bare = true
Unnamed repository; edit this file 'description' to name the repository.
# git ls-files --others --exclude-from=.git/info/exclude
# Lines that start with '#' are comments.
# For a project mostly in C, the following would be a good set of
# exclude patterns (uncomment them if you want to use them):
# *.[oa]
# *~
x;
B1@QbzA&" |l/ncҫb" shKHZĐ\d-:M({='íq6?pE_*縀49F8F{ݏr|N[e8
\ No newline at end of file
ref: refs/heads/master
[core]
repositoryformatversion = 0
filemode = true
bare = true
Unnamed repository; edit this file 'description' to name the repository.
# git ls-files --others --exclude-from=.git/info/exclude
# Lines that start with '#' are comments.
# For a project mostly in C, the following would be a good set of
# exclude patterns (uncomment them if you want to use them):
# *.[oa]
# *~
xA
0E]ZL24 ޥ'h)6(,<=䜚`2K#Zyvcqz",F+Mzb2ґa0,`Rx?!Fc=JTӶ{m-oy^Sם?)|֎В<*?lw;_+a_C
\ No newline at end of file
xA
0@Q9E$3I "]zdfۅp5& mD9"`&&$@=cD @eNcfRk<[QZ,.ZYvێj=HD
\ No newline at end of file
x5
0ayKeҀ{> %6m$ѷ7P\ſN{ >zrh@IoDMʑtFEd}fb\H. -Ք3WKyyI^7pLPK֨I9G.]n-
\ No newline at end of file
xA
0@Q9Ed&&)w CVBo?m C"z
G3-SLFA@
MG'2vaS8&2'^U}5/YsG8V.8NP{m= N} ^DX
\ No newline at end of file
x+)JMU06g040031QH+(a/`E۪{E 2<,
\ No newline at end of file
x-K!]sޛ14 .=2 abK6O#nJC4Rqb@#
tF8/e40Y0jX Ol݇C_58KBeϹ㍞4]aZ +-_~o7,
\ No newline at end of file
xͱ
0a<\BG\14 D3񳖒؞@,3Wqa8:AO ݞZjET%_Z`=Qz{8C4{ݏM*9+<h~,97
\ No newline at end of file
xKj1Pu`V}>#QdG ,xEZ&
:eXmb0L gl萂9mYB>/٥D޳9귭Wie]Zeoo>#\y0qcvZю]mi,YC/=F
\ No newline at end of file
# pack-refs with: peeled fully-peeled
647e137fd3b31939b36889eba854a298ef97b6ff refs/heads/branch2
feeed96ca75fcf8dcf183008f61dbf72e91ab4de refs/heads/master
f735011c9fcece41219729a33f7876cd8791f659 refs/tags/tag-to-commit
^354caa307c647cadd3a9c3bffaaa1a72c1ea1dac
7124713e403925bc772cd252b0dec099f3ced9c5 refs/tags/tag-to-tag
^354caa307c647cadd3a9c3bffaaa1a72c1ea1dac
ba899e5639273a6fa4d50d684af8db1ae070351e refs/tags/tag-to-tree
^e14b1cb9ad4e5120be959593996b777573f7432c
7a3343f584218e973165d943d7c0af47a52ca477 refs/test/ref-to-blob
11e67095628aa17b03436850e690faea3006c25d
61882eb85774ed4401681d800bb9c638031375e2
Hello World!
#!/bin/sh
exec echo "Hello"
dir
\ No newline at end of file
file with space + α
\ No newline at end of file
bbb
\ No newline at end of file
ref: refs/heads/master
[core]
repositoryformatversion = 0
filemode = true
bare = true
Unnamed repository; edit this file 'description' to name the repository.
# git ls-files --others --exclude-from=.git/info/exclude
# Lines that start with '#' are comments.
# For a project mostly in C, the following would be a good set of
# exclude patterns (uncomment them if you want to use them):
# *.[oa]
# *~
xK
0a9<:mAĝ M&thb ZtГ; fA=%2 HuG qD0`͋wi3<
签Ik'9)rHj8ET{ݏVk$#}W?G;|
\ No newline at end of file
#!/usr/bin/env python
# make corruption to c6c31ba413a4588cac7f77919bfcbe4adbf1d3b4 loose object
import os, zlib
def readfile(path):
with open(path, 'r') as f:
return f.read()
def writefile(path, data):
try:
os.unlink(path)
except OSError:
pass
with open(path, 'w') as f:
f.write(data)
z = readfile("c6c31ba413a4588cac7f77919bfcbe4adbf1d3b4.orig")
print `z`
d = zlib.decompress(z)
print `d`
D = d.replace('good', 'BAAD')
print `D`
Z = zlib.compress(D)
print `Z`
writefile("c6/c31ba413a4588cac7f77919bfcbe4adbf1d3b4", Z)
28c06ba333326af5266297b8aa21051f294f298d
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// Git-backup | Miscellaneous utilities
package main
import (
"encoding/hex"
"fmt"
"os"
"reflect"
"strings"
"syscall"
"unicode"
"unicode/utf8"
"unsafe"
)
// string -> []byte without copying
func Bytes(s string) []byte {
var b []byte
bp := (*reflect.SliceHeader)(unsafe.Pointer(&b))
bp.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
bp.Cap = len(s)
bp.Len = len(s)
return b
}
// []byte -> string without copying
func String(b []byte) string {
var s string
sp := (*reflect.StringHeader)(unsafe.Pointer(&s))
sp.Data = (*reflect.SliceHeader)(unsafe.Pointer(&b)).Data
sp.Len = len(b)
return s
}
// split string by sep and expect exactly 2 parts
func split2(s, sep string) (s1, s2 string, err error) {
parts := strings.Split(s, sep)
if len(parts) != 2 {
return "", "", fmt.Errorf("split2: %q has %v parts (expected 2, sep: %q)", s, len(parts), sep)
}
return parts[0], parts[1], nil
}
// (head+sep+tail) -> head, tail
func headtail(s, sep string) (head, tail string, err error) {
parts := strings.SplitN(s, sep, 2)
if len(parts) != 2 {
return "", "", fmt.Errorf("headtail: %q has no %q", s, sep)
}
return parts[0], parts[1], nil
}
// strip_prefix("/a/b", "/a/b/c/d/e") -> "c/d/e" (without leading /)
// path must start with prefix
func strip_prefix(prefix, path string) string {
if !strings.HasPrefix(path, prefix) {
panic(fmt.Errorf("strip_prefix: %q has no prefix %q", path, prefix))
}
path = path[len(prefix):]
for strings.HasPrefix(path, "/") {
path = path[1:] // strip leading /
}
return path
}
// reprefix("/a", "/b", "/a/str") -> "/b/str"
// path must start with prefix_from
func reprefix(prefix_from, prefix_to, path string) string {
path = strip_prefix(prefix_from, path)
return fmt.Sprintf("%s/%s", prefix_to, path)
}
// like ioutil.WriteFile() but takes native mode/perm
func writefile(path string, data []byte, perm uint32) error {
fd, err := syscall.Open(path, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_TRUNC, perm)
if err != nil {
return &os.PathError{"open", path, err}
}
f := os.NewFile(uintptr(fd), path)
_, err = f.Write(data)
err2 := f.Close()
if err == nil {
err = err2
}
return err
}
// escape path so that git is happy to use it as ref
// https://git.kernel.org/cgit/git/git.git/tree/refs.c?h=v2.9.0-37-g6d523a3#n34
// XXX very suboptimal
func path_refescape(path string) string {
outv := []string{}
for _, component := range strings.Split(path, "/") {
out := ""
dots := 0 // number of seen consecutive dots
for len(component) > 0 {
r, size := utf8.DecodeRuneInString(component)
// no ".." anywhere - we replace dots run to %46%46... with trailing "."
// this way for single "." case we'll have it intact and avoid .. anywhere
// also this way: trailing .git is always encoded as ".git"
if r == '.' {
dots += 1
component = component[size:]
continue
}
if dots != 0 {
out += strings.Repeat(escape("."), dots-1)
out += "."
dots = 0
}
rbytes := component[:size]
if shouldEscape(r) {
rbytes = escape(rbytes)
}
out += rbytes
component = component[size:]
}
// handle trailing dots
if dots != 0 {
out += strings.Repeat(escape("."), dots-1)
out += "."
}
if len(out) > 0 {
// ^. not allowed
if out[0] == '.' {
out = escape(".") + out[1:]
}
// .lock$ not allowed
if strings.HasSuffix(out, ".lock") {
out = out[:len(out)-5] + escape(".") + "lock"
}
}
outv = append(outv, out)
}
// strip trailing /
for len(outv) > 0 {
if len(outv[len(outv)-1]) != 0 {
break
}
outv = outv[:len(outv)-1]
}
return strings.Join(outv, "/")
}
func shouldEscape(r rune) bool {
if unicode.IsSpace(r) || unicode.IsControl(r) {
return true
}
switch r {
// NOTE RuneError is for always escaping non-valid UTF-8
case ':', '?', '[', '\\', '^', '~', '*', '@', '%', utf8.RuneError:
return true
}
return false
}
func escape(s string) string {
out := ""
for i := 0; i < len(s); i++ {
out += fmt.Sprintf("%%%02X", s[i])
}
return out
}
// unescape path encoded by path_refescape()
// decoding is permissive - any byte can be %-encoded, not only special cases
// XXX very suboptimal
func path_refunescape(s string) (string, error) {
l := len(s)
out := make([]byte, 0, len(s))
for i := 0; i < l; i++ {
c := s[i]
if c == '%' {
if i+2 >= l {
return "", EscapeError(s)
}
b, err := hex.DecodeString(s[i+1:i+3])
if err != nil {
return "", EscapeError(s)
}
c = b[0]
i += 2
}
out = append(out, c)
}
return String(out), nil
}
type EscapeError string
func (e EscapeError) Error() string {
return fmt.Sprintf("%q: invalid escape format", string(e))
}
// Copyright (C) 2015-2016 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
package main
import (
"reflect"
"strings"
"testing"
)
// check that String() and Bytes() create correct objects which alias original object memory
func TestStringBytes(t *testing.T) {
s := "Hello"
b := []byte(s)
s1 := String(b)
b1 := Bytes(s1)
if s1 != s { t.Error("string -> []byte -> String != Identity") }
if !reflect.DeepEqual(b1, b) { t.Error("[]byte -> String -> Bytes != Identity") }
b[0] = 'I'
if s != "Hello" { t.Error("string -> []byte not copied") }
if s1 != "Iello" { t.Error("[]byte -> String not aliased") }
if !reflect.DeepEqual(b1, b) { t.Error("string -> Bytes not aliased") }
}
func TestSplit2(t *testing.T) {
var tests = []struct { input, s1, s2 string; ok bool } {
{"", "", "", false},
{" ", "", "", true},
{"hello", "", "", false},
{"hello world", "hello", "world", true},
{"hello world 1", "", "", false},
}
for _, tt := range tests {
s1, s2, err := split2(tt.input, " ")
ok := err == nil
if s1 != tt.s1 || s2 != tt.s2 || ok != tt.ok {
t.Errorf("split2(%q) -> %q %q %v ; want %q %q %v", tt.input, s1, s2, ok, tt.s1, tt.s2, tt.ok)
}
}
}
func TestHeadtail(t *testing.T) {
var tests = []struct { input, head, tail string; ok bool } {
{"", "", "", false},
{" ", "", "", true},
{" ", "", " ", true},
{"hello world", "hello", "world", true},
{"hello world 1", "hello", "world 1", true},
{"hello world 2", "hello", " world 2", true},
}
for _, tt := range tests {
head, tail, err := headtail(tt.input, " ")
ok := err == nil
if head != tt.head || tail != tt.tail || ok != tt.ok {
t.Errorf("headtail(%q) -> %q %q %v ; want %q %q %v", tt.input, head, tail, ok, tt.head, tt.tail, tt.ok)
}
}
}
func TestPathEscapeUnescape(t *testing.T) {
type TestEntry struct { path string; escapedv []string }
te := func(path string, escaped ...string) TestEntry {
return TestEntry{path, escaped}
}
var tests = []TestEntry{
// path escaped non-canonical escapes
te("hello/world", "hello/world", "%68%65%6c%6c%6f%2f%77%6f%72%6c%64"),
te("hello/мир", "hello/мир"),
te("hello/ мир", "hello/%20мир"),
te("hel%lo/мир", "hel%25lo/мир"),
te(".hello/.world", "%2Ehello/%2Eworld"),
te("..hello/world.loc", "%2E.hello/world.loc"),
te("..hello/world.lock", "%2E.hello/world%2Elock"),
// leading /
te("/hello/world", "/hello/world"),
te("//hello///world", "//hello///world"),
// trailing /
te("/hello/world/", "/hello/world"),
te("/hello/world//", "/hello/world"),
// trailing ...
te("/hello/world.", "/hello/world."),
te("/hello/world..", "/hello/world%2E."),
te("/hello/world...", "/hello/world%2E%2E."),
te("/hello/world...git", "/hello/world%2E%2E.git"),
// .. anywhere
te("/hello/./world", "/hello/%2E/world"),
te("/hello/.a/world", "/hello/%2Ea/world"),
te("/hello/a./world", "/hello/a./world"),
te("/hello/../world", "/hello/%2E./world"),
te("/hello/a..b/world", "/hello/a%2E.b/world"),
te("/hello/a.c.b/world", "/hello/a.c.b/world"),
te("/hello/a.c..b/world", "/hello/a.c%2E.b/world"),
// special & control characters
te("/hel lo/wor\tld/a:?[\\^~*@%b/\001\004\n\xc2\xa0", "/hel%20lo/wor%09ld/a%3A%3F%5B%5C%5E%7E%2A%40%25b/%01%04%0A%C2%A0"),
// utf8 error
te("a\xc5z", "a%C5z"),
}
for _, tt := range tests {
escaped := path_refescape(tt.path)
if escaped != tt.escapedv[0] {
t.Errorf("path_refescape(%q) -> %q ; want %q", tt.path, escaped, tt.escapedv[0])
}
// also check the decoding
pathok := strings.TrimRight(tt.path, "/")
for _, escaped := range tt.escapedv {
unescaped, err := path_refunescape(escaped)
if unescaped != pathok || err != nil {
t.Errorf("path_refunescape(%q) -> %q %v ; want %q nil", escaped, unescaped, err, tt.path)
}
}
}
}
func TestPathUnescapeErr(t *testing.T) {
var tests = []struct{ escaped string }{
{"%"},
{"%2"},
{"%2q"},
{"hell%2q/world"},
}
for _, tt := range tests {
unescaped, err := path_refunescape(tt.escaped)
if err == nil || unescaped != "" {
t.Errorf("path_refunescape(%q) -> %q %v ; want \"\" err", tt.escaped, unescaped, err)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment