Commit 334282c3 authored by Nicolas Wavrant's avatar Nicolas Wavrant Committed by GitHub

Merge pull request #235 from Sebatyne/verify-on-recovery

Repozo : add an option to verify on recovery
parents b2895c09 1c6a9828
......@@ -5,7 +5,10 @@
5.5.2 (unreleased)
==================
- TBD
- Make repozo's recover mode atomic by recovering the backup in a
temporary file which is then moved to the expected output file.
- Add a new option to repozo in recover mode which allows to verify
backups integrity on the fly.
5.5.1 (2018-10-25)
==================
......
......@@ -73,6 +73,13 @@ Options for -R/--recover:
Note: for the stdout case, the index file will **not** be restored
automatically.
-w
--with-verification
Verify on the fly the backup files on recovering. This option runs
the same checks as when repozo is run in -V/--verify mode, and
allows to verify and recover a backup in one single step. If a sanity
check fails, the partially recovered ZODB will be left in place.
Options for -V/--verify:
-Q / --quick
Verify file sizes only (skip md5 checksums).
......@@ -101,11 +108,19 @@ READCHUNK = 16 * 1024
VERBOSE = False
class WouldOverwriteFiles(Exception):
class RepozoError(Exception):
pass
class WouldOverwriteFiles(RepozoError):
pass
class NoFiles(Exception):
class NoFiles(RepozoError):
pass
class VerificationFail(RepozoError):
pass
......@@ -146,7 +161,7 @@ def error(msg, *args):
def parseargs(argv):
global VERBOSE
try:
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:',
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:w',
['backup',
'recover',
'verify',
......@@ -160,6 +175,7 @@ def parseargs(argv):
'kill-old-on-full',
'date=',
'output=',
'with-verification',
])
except getopt.error as msg:
usage(1, msg)
......@@ -174,6 +190,7 @@ def parseargs(argv):
quick = False # -Q flag state
gzip = False # -z flag state
killold = False # -k flag state
withverify = False # -w flag state
options = Options()
......@@ -210,6 +227,8 @@ def parseargs(argv):
options.gzip = True
elif opt in ('-k', '--kill-old-on-full'):
options.killold = True
elif opt in ('-w', '--with-verify'):
options.withverify = True
else:
assert False, (opt, arg)
......@@ -229,6 +248,9 @@ def parseargs(argv):
if options.output is not None:
log('--output option is ignored in backup mode')
options.output = None
if options.withverify is not None:
log('--with-verify option is ignored in backup mode')
options.withverify = None
elif options.mode == RECOVER:
if options.file is not None:
log('--file option is ignored in recover mode')
......@@ -256,6 +278,9 @@ def parseargs(argv):
if options.killold:
log('--kill-old-on-full option is ignored in verify mode')
options.killold = False
if options.withverify is not None:
log('--with-verify option is ignored in verify mode')
options.withverify = None
return options
......@@ -360,8 +385,6 @@ def concat(files, ofp=None):
ifp = open(f, 'rb')
bytesread += dofile(func, ifp)
ifp.close()
if ofp:
ofp.close()
return bytesread, sum.hexdigest()
......@@ -649,11 +672,45 @@ def do_recover(options):
log('Recovering file to stdout')
outfp = sys.stdout
else:
# Delete old ZODB before recovering backup as size of
# old ZODB + full partial file may be superior to free disk space
if os.path.exists(options.output):
log('Deleting old %s', options.output)
os.unlink(options.output)
log('Recovering file to %s', options.output)
outfp = open(options.output, 'wb')
temporary_output_file = options.output + '.part'
outfp = open(temporary_output_file, 'wb')
if options.withverify:
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
with open(datfile) as fp:
truth_dict = {}
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
truth_dict[filename] = {
'size': endpos - startpos,
'sum': sum,
}
totalsz = 0
for repofile in repofiles:
reposz, reposum = concat([repofile], outfp)
expected_truth = truth_dict[repofile]
if reposz != expected_truth['size']:
raise VerificationFail(
"%s is %d bytes, should be %d bytes" % (
repofile, reposz, expected_truth['size']))
if reposum != expected_truth['sum']:
raise VerificationFail(
"%s has checksum %s instead of %s" % (
repofile, reposum, expected_truth['sum']))
totalsz += reposz
log("Recovered chunk %s : %s bytes, md5: %s", repofile, reposz, reposum)
log("Recovered a total of %s bytes", totalsz)
else:
reposz, reposum = concat(repofiles, outfp)
if outfp != sys.stdout:
outfp.close()
log('Recovered %s bytes, md5: %s', reposz, reposum)
if options.output is not None:
......@@ -666,6 +723,15 @@ def do_recover(options):
else:
log('No index file to restore: %s', source_index)
if outfp != sys.stdout:
outfp.close()
try:
os.rename(temporary_output_file, options.output)
except OSError:
log("ZODB has been fully recovered as %s, but it cannot be renamed into : %s",
temporary_output_file, options.output)
raise
def do_verify(options):
# Verify the sizes and checksums of all files mentioned in the .dat file
......@@ -725,21 +791,15 @@ def main(argv=None):
if argv is None:
argv = sys.argv[1:]
options = parseargs(argv)
if options.mode == BACKUP:
try:
if options.mode == BACKUP:
do_backup(options)
except WouldOverwriteFiles as e:
sys.exit(str(e))
elif options.mode == RECOVER:
try:
do_recover(options)
except NoFiles as e:
sys.exit(str(e))
else:
assert options.mode == VERIFY
try:
do_verify(options)
except NoFiles as e:
except (RepozoError, OSError) as e:
sys.exit(str(e))
......
......@@ -371,7 +371,7 @@ class Test_concat(OptionsTestBase, unittest.TestCase):
from ZODB.scripts.repozo import _GzipCloser
import tempfile
if self._repository_directory is None:
self._repository_directory = tempfile.mkdtemp()
self._repository_directory = tempfile.mkdtemp(prefix='zodb-test-')
fqn = os.path.join(self._repository_directory, name)
if gzip_file:
_opener = _GzipCloser
......@@ -414,7 +414,7 @@ class Test_concat(OptionsTestBase, unittest.TestCase):
ofp = Faux()
bytes, sum = self._callFUT(files, ofp)
self.assertEqual(ofp._written, [x.encode() for x in 'ABC'])
self.assertTrue(ofp._closed)
self.assertFalse(ofp._closed)
_marker = object()
class Test_gen_filename(OptionsTestBase, unittest.TestCase):
......@@ -674,7 +674,7 @@ class Test_do_full_backup(OptionsTestBase, unittest.TestCase):
def _makeDB(self):
import tempfile
datadir = self._data_directory = tempfile.mkdtemp()
datadir = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
return OurDB(self._data_directory)
def test_dont_overwrite_existing_file(self):
......@@ -729,7 +729,7 @@ class Test_do_incremental_backup(OptionsTestBase, unittest.TestCase):
def _makeDB(self):
import tempfile
datadir = self._data_directory = tempfile.mkdtemp()
datadir = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
return OurDB(self._data_directory)
def test_dont_overwrite_existing_file(self):
......@@ -868,11 +868,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
def test_w_full_backup_latest_no_index(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB')
self._callFUT(options)
......@@ -880,11 +881,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
def test_w_full_backup_latest_index(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC')
......@@ -894,11 +896,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
def test_w_incr_backup_latest_no_index(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._callFUT(options)
......@@ -906,11 +909,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
def test_w_incr_backup_latest_index(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC')
......@@ -918,6 +922,57 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC')
def test_w_incr_backup_with_verify_all_is_fine(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self._callFUT(options)
self.assertFalse(os.path.exists(output + '.part'))
self.assertEqual(_read_file(output), b'AAABBBB')
def test_w_incr_backup_with_verify_sum_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec61\n')
self.assertRaises(VerificationFail, self._callFUT, options)
self.assertTrue(os.path.exists(output + '.part'))
def test_w_incr_backup_with_verify_size_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 8 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertRaises(VerificationFail, self._callFUT, options)
self.assertTrue(os.path.exists(output + '.part'))
class Test_do_verify(OptionsTestBase, unittest.TestCase):
......@@ -1069,7 +1124,7 @@ class MonteCarloTests(unittest.TestCase):
def setUp(self):
# compute directory names
import tempfile
self.basedir = tempfile.mkdtemp()
self.basedir = tempfile.mkdtemp(prefix='zodb-test-')
self.backupdir = os.path.join(self.basedir, 'backup')
self.datadir = os.path.join(self.basedir, 'data')
self.restoredir = os.path.join(self.basedir, 'restore')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment