Commit af51e3ae authored by Tim Peters's avatar Tim Peters

find_files(): When trying to do recovery to a time earlier than that

of the most recent full backup, repozo.py failed to find the appropriate
files, erroneously claiming

    No files in repository before <specified time>

Repaired that.  Also made it much more robust against "junk files" the
user may create, or leave behind, in the backup directory.  Added test.
parent fa76112e
...@@ -5,6 +5,11 @@ Release date: DD-MMM-2004 ...@@ -5,6 +5,11 @@ Release date: DD-MMM-2004
ZODB ZODB
---- ----
When trying to do recovery to a time earlier than that of the most recent
full backup, repozo.py failed to find the appropriate files, erroneously
claiming "No files in repository before <specified time>". This has
been repaired.
Collector #1330: repozo.py -R can create corrupt .fs. Collector #1330: repozo.py -R can create corrupt .fs.
When looking for the backup files needed to recreate a Data.fs file, When looking for the backup files needed to recreate a Data.fs file,
repozo could (unintentionally) include its meta .dat files in the list, repozo could (unintentionally) include its meta .dat files in the list,
......
...@@ -53,7 +53,7 @@ Options for -R/--recover: ...@@ -53,7 +53,7 @@ Options for -R/--recover:
-D str -D str
--date=str --date=str
Recover state as of this date. str is in the format Recover state as of this date. str is in the format
yyyy-mm-dd[-hh[-mm]] yyyy-mm-dd[-hh[-mm[-ss]]]
By default, current time is used. By default, current time is used.
-o filename -o filename
...@@ -262,30 +262,32 @@ def gen_filename(options, ext=None): ...@@ -262,30 +262,32 @@ def gen_filename(options, ext=None):
t = time.gmtime()[:6] + (ext,) t = time.gmtime()[:6] + (ext,)
return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
# Return a list of files needed to reproduce state at time options.date. # Return a list of files needed to reproduce state at time options.date.
# This is a list, in chronological order, of the .fs[z] and .deltafs[z] # This is a list, in chronological order, of the .fs[z] and .deltafs[z]
# files, from the time of the most recent full backup preceding # files, from the time of the most recent full backup preceding
# options.date, up to options.date. # options.date, up to options.date.
import re
is_data_file = re.compile(r'\d{4}(?:-\d\d){5}\.(?:delta)?fsz?$').match
del re
def find_files(options): def find_files(options):
def rootcmp(x, y):
# This already compares in reverse order
return cmp(os.path.splitext(y)[0], os.path.splitext(x)[0])
when = options.date when = options.date
if not when: if not when:
when = gen_filename(options, '') when = gen_filename(options, '')
log('looking for files between last full backup and %s...', when) log('looking for files between last full backup and %s...', when)
all = os.listdir(options.repository) all = filter(is_data_file, os.listdir(options.repository))
all.sort(rootcmp) all.sort()
all.reverse() # newest file first
# Find the last full backup before date, then include all the # Find the last full backup before date, then include all the
# incrementals between that full backup and "when". # incrementals between that full backup and "when".
needed = [] needed = []
for fname in all: for fname in all:
root, ext = os.path.splitext(fname) root, ext = os.path.splitext(fname)
if root <= when and ext in ('.fs', '.fsz', '.deltafs', '.deltafsz'): if root <= when:
needed.append(fname) needed.append(fname)
if ext in ('.fs', '.fsz'): if ext in ('.fs', '.fsz'):
break break
# Make the file names relative to the repository directory # Make the file names relative to the repository directory
needed = [os.path.join(options.repository, f) for f in needed] needed = [os.path.join(options.repository, f) for f in needed]
# Restore back to chronological order # Restore back to chronological order
......
...@@ -26,6 +26,7 @@ import random ...@@ -26,6 +26,7 @@ import random
import time import time
import glob import glob
import sys import sys
import shutil
import ZODB import ZODB
from ZODB import FileStorage from ZODB import FileStorage
...@@ -68,34 +69,49 @@ class OurDB: ...@@ -68,34 +69,49 @@ class OurDB:
self.db.close() self.db.close()
self.db = None self.db = None
# Do recovery to current time, and check that it's identical to Data.fs. # Do recovery to time 'when', and check that it's identical to correctpath.
def check(): def check(correctpath='Data.fs', when=None):
os.system(PYTHON + '../repozo.py -vRr backup -o Copy.fs') if when is None:
f = file('Data.fs', 'rb') extra = ''
else:
extra = ' -D ' + when
cmd = PYTHON + '../repozo.py -vRr backup -o Copy.fs' + extra
os.system(cmd)
f = file(correctpath, 'rb')
g = file('Copy.fs', 'rb') g = file('Copy.fs', 'rb')
fguts = f.read() fguts = f.read()
gguts = g.read() gguts = g.read()
f.close() f.close()
g.close() g.close()
if fguts != gguts: if fguts != gguts:
raise ValueError("guts don't match") raise ValueError("guts don't match\n"
" correctpath=%r when=%r\n"
" cmd=%r" % (correctpath, when, cmd))
def mutatedb(db):
# Make random mutations to the btree in the database.
tree = db.gettree()
for dummy in range(100):
if random.random() < 0.6:
tree[random.randrange(100000)] = random.randrange(100000)
else:
keys = tree.keys()
if keys:
del tree[keys[0]]
get_transaction().commit()
db.close()
def main(): def main():
cleanup() cleanup()
os.mkdir('backup') os.mkdir('backup')
d = OurDB() d = OurDB()
for dummy in range(100): # Every 9th time thru the loop, we save a full copy of Data.fs,
# and at the end we ensure we can reproduce those too.
saved_snapshots = [] # list of (name, time) pairs for copies.
for i in range(100):
# Make some mutations. # Make some mutations.
tree = d.gettree() mutatedb(d)
for dummy2 in range(100):
if random.random() < 0.6:
tree[random.randrange(100000)] = random.randrange(100000)
else:
keys = tree.keys()
if keys:
del tree[keys[0]]
get_transaction().commit()
d.close()
# Pack about each tenth time. # Pack about each tenth time.
if random.random() < 0.1: if random.random() < 0.1:
...@@ -109,12 +125,23 @@ def main(): ...@@ -109,12 +125,23 @@ def main():
else: else:
os.system(PYTHON + '../repozo.py -zvBQr backup -f Data.fs') os.system(PYTHON + '../repozo.py -zvBQr backup -f Data.fs')
if i % 9 == 0:
copytime = '%04d-%02d-%02d-%02d-%02d-%02d' % (time.gmtime()[:6])
copyname = os.path.join('backup', "Data%d" % i) + '.fs'
shutil.copyfile('Data.fs', copyname)
saved_snapshots.append((copyname, copytime))
# Make sure the clock moves at least a second. # Make sure the clock moves at least a second.
time.sleep(1.01) time.sleep(1.01)
# Verify current Data.fs can be reproduced exactly. # Verify current Data.fs can be reproduced exactly.
check() check()
# Verify snapshots can be reproduced exactly.
for copyname, copytime in saved_snapshots:
print "Checking that", copyname, "at", copytime, "is reproducible."
check(copyname, copytime)
# Tear it all down. # Tear it all down.
cleanup() cleanup()
print 'Test passed!' print 'Test passed!'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment